I am trying to run the below notebook through databricks but getting the below error. I have tried to update the notebook timeout and the retry mechanism but still no luck yet.
NotebookData("/Users/mynotebook",9900, retry=3)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=9900) for f in res] # This is a blocking call.
print(result)
Can someone please help me to sort out this issue? Thanks
%python
from concurrent.futures import ThreadPoolExecutor
class NotebookData:
def __init__(self, path, timeout, parameters=None, retry=0):
self.path = path
self.timeout = timeout
self.parameters = parameters
self.retry = retry
def submitNotebook(notebook):
print("Running notebook %s" % notebook.path)
try:
if (notebook.parameters):
return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else:
return dbutils.notebook.run(notebook.path, notebook.timeout)
except Exception:
if notebook.retry < 1:
raise
print("Retrying notebook %s" % notebook.path)
notebook.retry = notebook.retry - 1
submitNotebook(notebook)
def parallelNotebooks(notebooks, numInParallel):
# This code limits the number of parallel notebooks.
with ThreadPoolExecutor(max_workers=numInParallel) as ec:
return [ec.submit(submitNotebook, notebook) for notebook in notebooks]
notebooks = [
NotebookData("/Users/mynotebook",1200000, retry=0)
]
res = parallelNotebooks(notebooks, 2)
result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
print(result)
Error:
Py4JJavaError Traceback (most recent call last)
<command-1143841910698378> in <module>
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
<command-1143841910698378> in <listcomp>(.0)
32 ]
33 res = parallelNotebooks(notebooks, 2)
---> 34 result = [f.result(timeout=1200000) for f in res] # This is a blocking call.
35 print(result)
/usr/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
426 raise CancelledError()
427 elif self._state == FINISHED:
--> 428 return self.__get_result()
429
430 self._condition.wait(timeout)
/usr/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
/usr/lib/python3.7/concurrent/futures/thread.py in run(self)
55
56 try:
---> 57 result = self.fn(*self.args, **self.kwargs)
58 except BaseException as exc:
59 self.future.set_exception(exc)
<command-1143841910698378> in submitNotebook(notebook)
12 return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
13 else:
---> 14 return dbutils.notebook.run(notebook.path, notebook.timeout)
15 except Exception:
16 if notebook.retry < 1:
/local_disk0/tmp/1664351986642-0/dbutils.py in run(self, path, timeout_seconds, arguments, _NotebookHandler__databricks_internal_cluster_spec)
136 arguments,
137 __databricks_internal_cluster_spec,
--> 138 self.shell.currentJobGroup)
139
140 def __repr__(self):
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
125 def deco(*a, **kw):
126 try:
--> 127 return f(*a, **kw)
128 except py4j.protocol.Py4JJavaError as e:
129 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o1741._run.
: com.databricks.WorkflowException: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:95)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl.run(NotebookUtilsImpl.scala:122)
at com.databricks.dbutils_v1.impl.NotebookUtilsImpl._run(NotebookUtilsImpl.scala:89)
at sun.reflect.GeneratedMethodAccessor820.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.databricks.NotebookExecutionException: FAILED
at com.databricks.workflow.WorkflowDriver.run0(WorkflowDriver.scala:141)
at com.databricks.workflow.WorkflowDriver.run(WorkflowDriver.scala:90)
... 12 more