I want to submit spark job from my local system, to remote server where cloudera is installed(Spark on YARN). Have tried all the possibilities.
Trying to create SparkSession and SparkContext
The following is the code:
1)--------------------------------------------------
from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession
appName = 'TEST_ON_SPARK'
masterUrl = 'yarn'
spark = None
try:
spark = SparkSession.builder.appName(appName).master(masterUrl).\
config("spark.hadoop.fs.defaultFS", "hdfs://192.168.XX.XX:8020").\
config("spark.hadoop.yarn.resourcemanager.address", "192.168.XX.XX:8032").getOrCreate()
except Exception as e:
print(e)
raise e
finally:
if spark is not None:
spark.stop()
##################Also, tried
try:
conf = SparkConf().setAppName(appName).setMaster(masterUrl).\
set("spark.hadoop.fs.defaultFS", "hdfs://192.168.XX.XX").\
set("spark.hadoop.yarn.resourcemanager.hostname", "192.168.XX.XX").\
set("spark.submit.deployMode","cluster").\
set("spark.hadoop.yarn.resourcemanager.address", "192.168.XX.XX:8032").\
set("spark.hadoop.yarn.resourcemanager.hostname", "resourcemanager.192.168.XX.XX").\
set("spark.yarn.access.namenodes", "hdfs://192.168.XX.XX:8020,hdfs://192.168.XX.XX:8020").\
set("spark.yarn.stagingDir", "hdfs://192.168.XX.XX:8020/user/username.surname/")
for i in conf.getAll():
print(i)
print(1)
sparkContext = SparkContext(conf=conf)
print(2)
spark = SparkSession.builder.config(conf=conf).getOrCreate()
except Exception as e:
print(e)
raise e
finally:
if spark is not None:
spark.stop()
if sparkContext is not None:
sparkContext.stop()