I am forecasting values for several thousand, independent objects. The scripts are executed on databricks. Every forecasts takes several seconds. Therefore, i would like to try parallelisation to hopefully see a speedup.
Toy task
Suppose i have the following task
def task(k):
import numpy as np
N=np.int32(1e6)
for _ in range(N):
__ = np.random.rand()
__**np.exp(__)
which takes approx 2s to execute.
Using Pathos
The following Snippet
import pathos
pathos.pools.ProcessPool().map(task, range(10))
running on Single-Node Cluster produces, not helpful error-message
/databricks/python/lib/python3.7/site-packages/pathos/multiprocessing.py in map(self, f, *args, **kwds)
135 AbstractWorkerPool._AbstractWorkerPool__map(self, f, *args, **kwds)
136 _pool = self._serve()
--> 137 return _pool.map(star(f), zip(*args)) # chunksize
138 map.__doc__ = AbstractWorkerPool.map.__doc__
139 def imap(self, f, *args, **kwds):
/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in map(self, func, iterable, chunksize)
266 in a list that is returned.
267 '''
--> 268 return self._map_async(func, iterable, mapstar, chunksize).get()
269
270 def starmap(self, func, iterable, chunksize=None):
/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
/databricks/python/lib/python3.7/site-packages/multiprocess/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
429 break
430 try:
--> 431 put(task)
432 except Exception as e:
433 job, idx = task[:2]
/databricks/python/lib/python3.7/site-packages/multiprocess/connection.py in send(self, obj)
207 self._check_closed()
208 self._check_writable()
--> 209 self._send_bytes(_ForkingPickler.dumps(obj))
210
211 def recv_bytes(self, maxlength=None):
/databricks/python/lib/python3.7/site-packages/multiprocess/reduction.py in dumps(cls, obj, protocol, *args, **kwds)
52 def dumps(cls, obj, protocol=None, *args, **kwds):
53 buf = io.BytesIO()
---> 54 cls(buf, protocol, *args, **kwds).dump(obj)
55 return buf.getbuffer()
56
/databricks/python/lib/python3.7/site-packages/dill/_dill.py in dump(self, obj)
452 raise PicklingError(msg)
453 else:
--> 454 StockPickler.dump(self, obj)
455 stack.clear() # clear record of 'recursion-sensitive' pickled objects
456 return
/usr/lib/python3.7/pickle.py in dump(self, obj)
435 if self.proto >= 4:
436 self.framer.start_framing()
--> 437 self.save(obj)
438 self.write(STOP)
439 self.framer.end_framing()
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
787 write(MARK)
788 for element in obj:
--> 789 save(element)
790
791 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
772 if n <= 3 and self.proto >= 2:
773 for element in obj:
--> 774 save(element)
775 # Subtle. Same as in the big comment below.
776 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
772 if n <= 3 and self.proto >= 2:
773 for element in obj:
--> 774 save(element)
775 # Subtle. Same as in the big comment below.
776 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_function(pickler, obj)
1445 globs, obj.__name__,
1446 obj.__defaults__, obj.__closure__,
-> 1447 obj.__dict__, fkwdefaults), obj=obj)
1448 else:
1449 _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False)
/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
636 else:
637 save(func)
--> 638 save(args)
639 write(REDUCE)
640
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
787 write(MARK)
788 for element in obj:
--> 789 save(element)
790
791 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
772 if n <= 3 and self.proto >= 2:
773 for element in obj:
--> 774 save(element)
775 # Subtle. Same as in the big comment below.
776 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_cell(pickler, obj)
1176 log.info("Ce: %s" % obj)
1177 f = obj.cell_contents
-> 1178 pickler.save_reduce(_create_cell, (f,), obj=obj)
1179 log.info("# Ce")
1180 return
/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
636 else:
637 save(func)
--> 638 save(args)
639 write(REDUCE)
640
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
772 if n <= 3 and self.proto >= 2:
773 for element in obj:
--> 774 save(element)
775 # Subtle. Same as in the big comment below.
776 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/databricks/python/lib/python3.7/site-packages/dill/_dill.py in save_function(pickler, obj)
1445 globs, obj.__name__,
1446 obj.__defaults__, obj.__closure__,
-> 1447 obj.__dict__, fkwdefaults), obj=obj)
1448 else:
1449 _super = ('super' in getattr(obj.func_code,'co_names',())) and (_byref is not None) and getattr(pickler, '_recurse', False)
/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
636 else:
637 save(func)
--> 638 save(args)
639 write(REDUCE)
640
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
502 f = self.dispatch.get(t)
503 if f is not None:
--> 504 f(self, obj) # Call unbound method with explicit self
505 return
506
/usr/lib/python3.7/pickle.py in save_tuple(self, obj)
787 write(MARK)
788 for element in obj:
--> 789 save(element)
790
791 if id(obj) in memo:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
547
548 # Save the reduce() output and finally memoize the object
--> 549 self.save_reduce(obj=obj, *rv)
550
551 def persistent_id(self, obj):
/usr/lib/python3.7/pickle.py in save_reduce(self, func, args, state, listitems, dictitems, obj)
657
658 if dictitems is not None:
--> 659 self._batch_setitems(dictitems)
660
661 if state is not None:
/usr/lib/python3.7/pickle.py in _batch_setitems(self, items)
883 for k, v in tmp:
884 save(k)
--> 885 save(v)
886 write(SETITEMS)
887 elif n:
/usr/lib/python3.7/pickle.py in save(self, obj, save_persistent_id)
522 reduce = getattr(obj, "__reduce_ex__", None)
523 if reduce is not None:
--> 524 rv = reduce(self.proto)
525 else:
526 reduce = getattr(obj, "__reduce__", None)
/databricks/spark/python/pyspark/context.py in __getnewargs__(self)
346 # This method is called when attempting to pickle SparkContext, which is always an error:
347 raise Exception(
--> 348 "It appears that you are attempting to reference SparkContext from a broadcast "
349 "variable, action, or transformation. SparkContext can only be used on the driver, "
350 "not in code that it run on workers. For more information, see SPARK-5063."
I would like to note that pathos.pools.ThreadPool().map(..)
runs fine, but does not produce a speed-up (as expected).
Using Ray
Initialising Ray leads to error, i.e., snippet
import ray
ray.init()
leads to error message
/databricks/python/lib/python3.7/site-packages/ray/worker.py in init(address, num_cpus, num_gpus, resources, object_store_memory, local_mode, ignore_reinit_error, include_dashboard, dashboard_host, dashboard_port, job_config, configure_logging, logging_level, logging_format, log_to_driver, _enable_object_reconstruction, _redis_max_memory, _plasma_directory, _node_ip_address, _driver_object_store_memory, _memory, _redis_password, _java_worker_options, _code_search_path, _temp_dir, _load_code_from_local, _lru_evict, _metrics_export_port, _object_spilling_config, _system_config)
766 driver_object_store_memory=_driver_object_store_memory,
767 job_id=None,
--> 768 job_config=job_config)
769
770 for hook in _post_init_hooks:
/databricks/python/lib/python3.7/site-packages/ray/worker.py in connect(node, mode, log_to_driver, worker, driver_object_store_memory, job_id, job_config)
1135 try:
1136 if not faulthandler.is_enabled():
-> 1137 faulthandler.enable(all_threads=False)
1138 except io.UnsupportedOperation:
1139 pass # ignore
AttributeError: 'ConsoleBuffer' object has no attribute 'fileno'
The fix discussed here does not work.
Any ideas how else i could achieve this?