I am doing a simple dfq.head()
of a koalas dataframe but got the error below.
I know this is not related to how my data looks like but rather than the versions of the libraries I am using. But can't figure out the issue.
This is my spark declaration:
spark = (SparkSession
.builder
.appName("my_app")
.config('spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT', 1)
.config('spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT', 1)
.config('spark.yarn.appMasterEnv.PYARROW_IGNORE_TIMEZONE', 1)
.config('spark.executorEnv.PYARROW_IGNORE_TIMEZONE', 1)
.enableHiveSupport()
.getOrCreate())
df = spark.read.table('sometable')
import databricks.koalas as koalas
dfq = df.to_koalas()
dfq.head()
Error:
~/.local/lib/python3.7/site-packages/databricks/koalas/internal.py in to_pandas_frame(self)
932 """ Return as pandas DataFrame. """
933 sdf = self.to_internal_spark_frame
--> 934 pdf = sdf.toPandas()
935 if len(pdf) == 0 and len(sdf.schema) > 0:
936 pdf = pdf.astype(
~/.local/lib/python3.7/site-packages/pyspark/sql/dataframe.py in toPandas(self)
2127 _check_dataframe_localize_timestamps
2128 import pyarrow
-> 2129 batches = self._collectAsArrow()
2130 if len(batches) > 0:
2131 table = pyarrow.Table.from_batches(batches)
~/.local/lib/python3.7/site-packages/pyspark/sql/dataframe.py in _collectAsArrow(self)
2185 with SCCallSiteSync(self._sc):
2186 from pyspark.rdd import _load_from_socket
-> 2187 port, auth_secret, jsocket_auth_server = self._jdf.collectAsArrowToPython()
2188 try:
2189 return list(_load_from_socket((port, auth_secret), ArrowStreamSerializer()))
ValueError: not enough values to unpack (expected 3, got 2)
Exception in thread "serve-Arrow" java.net.SocketTimeoutException: Accept timed out
at java.net.PlainSocketImpl.socketAccept(Native Method)
at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
at java.net.ServerSocket.implAccept(ServerSocket.java:560)
at java.net.ServerSocket.accept(ServerSocket.java:528)
at org.apache.spark.api.python.PythonServer$$anon$1.run(PythonRDD.scala:883)
Exception in thread "serve-Arrow" java.net.SocketTimeoutException: Accept timed out
at java.net.PlainSocketImpl.socketAccept(Native Method)
at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
at java.net.ServerSocket.implAccept(ServerSocket.java:560)
at java.net.ServerSocket.accept(ServerSocket.java:528)
at org.apache.spark.api.python.PythonServer$$anon$1.run(PythonRDD.scala:883)