I am trying to implement numba with dask using a simple groupby operation on a dataset.It is working fine on a single system but as I move ahead to apply it on a distributed one ,it is giving error which I am unable to get through.Please help.Thanks in advance.
This is the code.
import pandas as pd
import time
import dask as dask
import dask.distributed as distributed
import dask.dataframe as dd
import dask.delayed as delayed
from dask.distributed import Client,progress
from numba import jit
@jit
def group_sum(data_frame):
data_frame = data_frame.groupby(['col1'])[['col2']].sum(split_out=10)
return data_frame
client = Client('IP:PORT')
print client
print client.scheduler_info()
f = []
chunksize = 10 ** 6
for chunk in pd.read_csv('file.csv', chunksize=chunksize):
f_in = client.scatter(chunk)
f.append(f_in)
ddf = dd.from_delayed(f)
ddf = group_sum(ddf)
c = ddf.to_delayed()
future = client.compute(c)
progress(future)
result = client.gather(future)
print result
The error traceback is as follows:
f_in = client.scatter(chunk)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1773, in scatter
asynchronous=asynchronous, hash=hash)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 652, in sync
return sync(self.loop, func, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 275, in sync
six.reraise(*error[0])
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 260, in f
result[0] = yield make_coro()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1641, in _scatter
timeout=timeout)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 552, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 321, in handle_comm
result = yield result
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/scheduler.py", line 2155, in scatter
report=False)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/utils_comm.py", line 128, in scatter_to_workers
for address, v in d.items()])
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 208, in All
result = yield tasks.next()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 610, in send_recv_from_rpc
result = yield send_recv(comm=comm, op=key, **kwargs)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1113, in run
yielded = self.gen.send(value)
File "/usr/local/lib/python2.7/site-packages/distributed/core.py", line 446, in send_recv
six.reraise(*clean_exception(**response))
File "<string>", line 3, in reraise
TypeError: raise: arg 3 must be a traceback or None