Parallelisation on databricks with Pathos or Ray

Question

I am forecasting values for several thousand, independent objects. The scripts are executed on databricks. Every forecasts takes several seconds. Therefore, i would like to try parallelisation to hopefully see a speedup.

Toy task

Suppose i have the following task

def task(k):
  import numpy as np
  N=np.int32(1e6)
  for _ in range(N):
    __ = np.random.rand()
    __**np.exp(__)

which takes approx 2s to execute.

Using Pathos

The following Snippet

import pathos

pathos.pools.ProcessPool().map(task, range(10))

running on Single-Node Cluster produces, not helpful error-message

/databricks/python/lib/python3.7/site-packages/pathos/multiprocessing.py in map(self, f, *args, **kwds)
    135         AbstractWorkerPool._AbstractWorkerPool__map(self, f, *args, **kwds)
    136         _pool = self._serve()
--> 137         return _pool.map(star(f), zip(*args)) # chunksize
    138     map.__doc__ = AbstractWorkerPool.map.__doc__
    139     def imap(self, f, *args, **kwds):

/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in map(self, func, iterable, chunksize)
    266         in a list that is returned.
    267         '''
--> 268         return self._map_async(func, iterable, mapstar, chunksize).get()
    269 
    270     def starmap(self, func, iterable, chunksize=None):

/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in get(self, timeout)
    655             return self._value
    656         else:
--> 657             raise self._value
    658 
    659     def _set(self, i, obj):

/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
    429                         break
    430                     try:
--> 431                         put(task)
    432                     except Exception as e:
    433                         job, idx = task[:2]

/databricks/python/lib/python3.7/site-packages/multiprocess/connection.py in send(self, obj)
    207         self._check_closed()
    208         self._check_writable()
--> 209         self._send_bytes(_ForkingPickler.dumps(obj))
    210 
    211     def recv_bytes(self, maxlength=None):

/databricks/python/lib/python3.7/site-packages/multiprocess/reduction.py in dumps(cls, obj, protocol, *args, **kwds)
     52     def dumps(cls, obj, protocol=None, *args, **kwds):
     53         buf = io.BytesIO()
---> 54         cls(buf, protocol, *args, **kwds).dump(obj)
     55         return buf.getbuffer()
     56 

/databricks/python/lib/python3.7/site-packages/dill/_dill.py in dump(self, obj)
    452             raise PicklingError(msg)
    453         else:
--> 454             StockPickler.dump(self, obj)
    455         stack.clear()  # clear record of 'recursion-sensitive' pickled objects
    456         return

/usr/lib/python3.7/pickle.py in dump(self, obj)
    435         if self.proto >= 4:
    436             self.framer.start_framing()
--> 437         self.save(obj)
    438         self.write(STOP)
    439         self.framer.end_framing()

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    787         write(MARK)
    788         for element in obj:
--> 789             save(element)
    790 
    791         if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    772         if n <= 3 and self.proto >= 2:
    773             for element in obj:
--> 774                 save(element)
    775             # Subtle.  Same as in the big comment below.
    776             if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    772         if n <= 3 and self.proto >= 2:
    773             for element in obj:
--> 774                 save(element)
    775             # Subtle.  Same as in the big comment below.
    776             if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_function(pickler, obj)
   1445                                 globs, obj.__name__,
   1446                                 obj.__defaults__, obj.__closure__,
-> 1447                                 obj.__dict__, fkwdefaults), obj=obj)
   1448         else:
   1449             _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False)

/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    636         else:
    637             save(func)
--> 638             save(args)
    639             write(REDUCE)
    640 

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    787         write(MARK)
    788         for element in obj:
--> 789             save(element)
    790 
    791         if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    772         if n <= 3 and self.proto >= 2:
    773             for element in obj:
--> 774                 save(element)
    775             # Subtle.  Same as in the big comment below.
    776             if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_cell(pickler, obj)
   1176     log.info("Ce: %s" % obj)
   1177     f = obj.cell_contents
-> 1178     pickler.save_reduce(_create_cell, (f,), obj=obj)
   1179     log.info("# Ce")
   1180     return

/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    636         else:
    637             save(func)
--> 638             save(args)
    639             write(REDUCE)
    640 

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    772         if n <= 3 and self.proto >= 2:
    773             for element in obj:
--> 774                 save(element)
    775             # Subtle.  Same as in the big comment below.
    776             if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_function(pickler, obj)
   1445                                 globs, obj.__name__,
   1446                                 obj.__defaults__, obj.__closure__,
-> 1447                                 obj.__dict__, fkwdefaults), obj=obj)
   1448         else:
   1449             _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False)

/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    636         else:
    637             save(func)
--> 638             save(args)
    639             write(REDUCE)
    640 

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    502         f = self.dispatch.get(t)
    503         if f is not None:
--> 504             f(self, obj) # Call unbound method with explicit self
    505             return
    506 

/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
    787         write(MARK)
    788         for element in obj:
--> 789             save(element)
    790 
    791         if id(obj) in memo:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    547 
    548         # Save the reduce() output and finally memoize the object
--> 549         self.save_reduce(obj=obj, *rv)
    550 
    551     def persistent_id(self, obj):

/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
    657 
    658         if dictitems is not None:
--> 659             self._batch_setitems(dictitems)
    660 
    661         if state is not None:

/usr/lib/python3.7/pickle.py in _batch_setitems(self, items)
    883                 for k, v in tmp:
    884                     save(k)
--> 885                     save(v)
    886                 write(SETITEMS)
    887             elif n:

/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
    522             reduce = getattr(obj, "__reduce_ex__", None)
    523             if reduce is not None:
--> 524                 rv = reduce(self.proto)
    525             else:
    526                 reduce = getattr(obj, "__reduce__", None)

/databricks/spark/python/pyspark/context.py in __getnewargs__(self)
    346         # This method is called when attempting to pickle SparkContext, which is always an error:
    347         raise Exception(
--> 348             "It appears that you are attempting to reference SparkContext from a broadcast "
    349             "variable, action, or transformation. SparkContext can only be used on the driver, "
    350             "not in code that it run on workers. For more information, see SPARK-5063."

I would like to note that pathos.pools.ThreadPool().map(..) runs fine, but does not produce a speed-up (as expected).

Using Ray

Initialising Ray leads to error, i.e., snippet

import ray
ray.init()

leads to error message

/databricks/python/lib/python3.7/site-packages/ray/worker.py in init(address, num_cpus, num_gpus, resources, object_store_memory, local_mode, ignore_reinit_error, include_dashboard, dashboard_host, dashboard_port, job_config, configure_logging, logging_level, logging_format, log_to_driver, _enable_object_reconstruction, _redis_max_memory, _plasma_directory, _node_ip_address, _driver_object_store_memory, _memory, _redis_password, _java_worker_options, _code_search_path, _temp_dir, _load_code_from_local, _lru_evict, _metrics_export_port, _object_spilling_config, _system_config)
    766         driver_object_store_memory=_driver_object_store_memory,
    767         job_id=None,
--> 768         job_config=job_config)
    769 
    770     for hook in _post_init_hooks:

/databricks/python/lib/python3.7/site-packages/ray/worker.py in connect(node, mode, log_to_driver, worker, driver_object_store_memory, job_id, job_config)
   1135     try:
   1136         if not faulthandler.is_enabled():
-> 1137             faulthandler.enable(all_threads=False)
   1138     except io.UnsupportedOperation:
   1139         pass  # ignore

AttributeError: 'ConsoleBuffer' object has no attribute 'fileno'

The fix discussed here does not work.

Any ideas how else i could achieve this?

Parallelisation on databricks with Pathos or Ray

Toy task

Using Pathos

Using Ray

0 Answers0