0

I am doing a simple dfq.head() of a koalas dataframe but got the error below.

I know this is not related to how my data looks like but rather than the versions of the libraries I am using. But can't figure out the issue.

This is my spark declaration:

spark = (SparkSession
         .builder
         .appName("my_app")
         .config('spark.yarn.appMasterEnv.ARROW_PRE_0_15_IPC_FORMAT', 1)
         .config('spark.executorEnv.ARROW_PRE_0_15_IPC_FORMAT', 1)
         .config('spark.yarn.appMasterEnv.PYARROW_IGNORE_TIMEZONE', 1)
         .config('spark.executorEnv.PYARROW_IGNORE_TIMEZONE', 1)
         .enableHiveSupport()
         .getOrCreate())

df = spark.read.table('sometable')

import databricks.koalas as koalas
dfq = df.to_koalas()
dfq.head()

Error:

~/.local/lib/python3.7/site-packages/databricks/koalas/internal.py in to_pandas_frame(self)
    932         """ Return as pandas DataFrame. """
    933         sdf = self.to_internal_spark_frame
--> 934         pdf = sdf.toPandas()
    935         if len(pdf) == 0 and len(sdf.schema) > 0:
    936             pdf = pdf.astype(

~/.local/lib/python3.7/site-packages/pyspark/sql/dataframe.py in toPandas(self)
   2127                         _check_dataframe_localize_timestamps
   2128                     import pyarrow
-> 2129                     batches = self._collectAsArrow()
   2130                     if len(batches) > 0:
   2131                         table = pyarrow.Table.from_batches(batches)

~/.local/lib/python3.7/site-packages/pyspark/sql/dataframe.py in _collectAsArrow(self)
   2185         with SCCallSiteSync(self._sc):
   2186             from pyspark.rdd import _load_from_socket
-> 2187             port, auth_secret, jsocket_auth_server = self._jdf.collectAsArrowToPython()
   2188             try:
   2189                 return list(_load_from_socket((port, auth_secret), ArrowStreamSerializer()))

ValueError: not enough values to unpack (expected 3, got 2)
Exception in thread "serve-Arrow" java.net.SocketTimeoutException: Accept timed out
    at java.net.PlainSocketImpl.socketAccept(Native Method)
    at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
    at java.net.ServerSocket.implAccept(ServerSocket.java:560)
    at java.net.ServerSocket.accept(ServerSocket.java:528)
    at org.apache.spark.api.python.PythonServer$$anon$1.run(PythonRDD.scala:883)
Exception in thread "serve-Arrow" java.net.SocketTimeoutException: Accept timed out
    at java.net.PlainSocketImpl.socketAccept(Native Method)
    at java.net.AbstractPlainSocketImpl.accept(AbstractPlainSocketImpl.java:409)
    at java.net.ServerSocket.implAccept(ServerSocket.java:560)
    at java.net.ServerSocket.accept(ServerSocket.java:528)
    at org.apache.spark.api.python.PythonServer$$anon$1.run(PythonRDD.scala:883)
heinistic
  • 731
  • 2
  • 8
  • 16

0 Answers0