2
spark-shell --packages com.databricks:spark-csv_2.11:1.5.0

spark > val flightData2015 = spark.read.option("inferSchema", "true").option("header","true").csv("/Users/me/Desktop/2015-summary.csv")

I am getting this exception.

java.io.IOException: Failed to connect to /192.168.1.112:51351 at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:232) at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:182) at org.apache.spark.rpc.netty.NettyRpcEnv.downloadClient(NettyRpcEnv.scala:366) at org.apache.spark.rpc.netty.NettyRpcEnv.openChannel(NettyRpcEnv.scala:332) at org.apache.spark.util.Utils$.doFetchFile(Utils.scala:654) at org.apache.spark.util.Utils$.fetchFile(Utils.scala:480) at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$5.apply(Executor.scala:696) at org.apache.spark.executor.Executor$$anonfun$org$apache$spark$executor$Executor$$updateDependencies$5.apply(Executor.scala:688) at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732) at org.apache.spark.executor.Executor.org$apache$spark$executor$Executor$$updateDependencies(Executor.scala:688) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:308) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: io.netty.channel.AbstractChannel$AnnotatedConnectException: Operation timed out: /192.168.1.112:51351 at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method) at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717) at io.netty.channel.socket.nio.NioSocketChannel.doFinishConnect(NioSocketChannel.java:257) at io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.finishConnect(AbstractNioChannel.java:291) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:631) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144) ... 1 more

  • I am not sure, I set this configuration spark.conf.set("spark.sql.shuffle.partitions", "5") and it was working, but second time it again stopped working even with this configuration. – Sarath Sasikumar Jul 09 '19 at 14:16
  • 2
    why are you using the databricks package in your spark shell. it is not being used in you read command. what version of spark you are running? what is your spark context looks like? what is your master set as? post those things to the community. i think your syntax looks good to read CSV file – Aaron Jul 09 '19 at 15:04
  • I am using spark version **spark-2.2.0-bin-hadoop2.7**. Databricks package is used to get the dependencies for reading CSV format, even If I use just **spark-shell** I am getting this error. I am running spark in local standalone mode. – Sarath Sasikumar Jul 10 '19 at 00:36

0 Answers0