I have a single cluster deployed using cloudera manager and spark parcel installed,
when typing pyspark
in shell, it works yet the running the below code on jupyter throws exception
code
import sys
import py4j
from pyspark.sql import SparkSession
from pyspark import SparkContext, SparkConf
conf = SparkConf()
conf.setMaster('yarn-client')
conf.setAppName('SPARK APP')
sc = SparkContext(conf=conf)
# sc= SparkContext.getOrCreate()
# sc.stop()
def mod(x):
import numpy as np
return (x, np.mod(x, 2))
rdd = sc.parallelize(range(1000)).map(mod).take(10)
print (rdd)
Exception
/usr/lib/python3.6/site-packages/pyspark/context.py in _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
187 self._accumulatorServer = accumulators._start_update_server(auth_token)
188 (host, port) = self._accumulatorServer.server_address
--> 189 self._javaAccumulator = self._jvm.PythonAccumulatorV2(host, port, auth_token)
190 self._jsc.sc().register(self._javaAccumulator)
191
TypeError: 'JavaPackage' object is not callable