I'm hoping someone can help me debug an issue we're seeing with subclassed ndarray
s in spark. Specifically when broadcast a subclassed array it seems to lose the extra information. A trivial example is below:
>>> import numpy as np
>>>
>>> class Test(np.ndarray):
... def __new__(cls, input_array, info=None):
... obj = np.asarray(input_array).view(cls)
... obj.info = info
... return obj
...
... def __array_finalize__(self, obj):
... if not hasattr(self, "info"):
... self.info = getattr(obj, 'info', None)
... else:
... print("has info attribute: %s" % getattr(self, 'info'))
...
>>> test = Test(np.array([[1,2,3],[4,5,6]]), info="info")
>>> print(test.info)
info
>>> print(sc.broadcast(test).value)
[[1 2 3]
[4 5 6]]
>>> print(sc.broadcast(test).value.info)
None