1

I am trying to pass a Typesafe config file to the spark submit task and print the details in the config file.

import org.slf4j.{Logger, LoggerFactory}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.spark.sql.SparkSession
 
object Bootstrap extends MyLogging {
 
 val spark: SparkSession = SparkSession.builder.enableHiveSupport().getOrCreate()
 val config: Config = ConfigFactory.load("application.conf")
 
 def main(args: Array[String]): Unit = {
  val url: String = config.getString("db.url")
  val user: String = config.getString("db.user")
  println(url)
  println(user)
 }
}

application.conf file :

db {
  url = "jdbc:postgresql://localhost:5432/test"
  user = "test"
}

I have uploaded the application.conf file to the dbfs and using the same path to create the job.

Spark submit job JSON :

{
  "new_cluster": {
    "spark_version": "6.4.x-esr-scala2.11",
    "azure_attributes": {
      "availability": "ON_DEMAND_AZURE",
      "first_on_demand": 1,
      "spot_bid_max_price": -1
    },
    "node_type_id": "Standard_DS3_v2",
    "enable_elastic_disk": true,
    "num_workers": 1
  },
  "spark_submit_task": {
    "parameters": [
      "--class",
      "Bootstrap",
      "--conf",
      "spark.driver.extraClassPath=dbfs:/tmp/",
      "--conf",
      "spark.executor.extraClassPath=dbfs:/tmp/",
      "--files",
      "dbfs:/tmp/application.conf",
      "dbfs:/tmp/code-assembly-0.1.0.jar"
    ]
  },
  "email_notifications": {},
  "name": "application-conf-test",
  "max_concurrent_runs": 1
}

I have used above json to create the spark submit job and tried to run the spark-submit job using datbricks CLI commands.

Error :

Exception in thread "main" com.typesafe.config.ConfigException$Missing: No configuration setting found for key 'db'
    at com.typesafe.config.impl.SimpleConfig.findKey(SimpleConfig.java:124)
    at com.typesafe.config.impl.SimpleConfig.find(SimpleConfig.java:147)
    at com.typesafe.config.impl.SimpleConfig.find(SimpleConfig.java:159)
    at com.typesafe.config.impl.SimpleConfig.find(SimpleConfig.java:164)
    at com.typesafe.config.impl.SimpleConfig.getString(SimpleConfig.java:206)
    at Bootstrap$.main(Test.scala:16)
    at Bootstrap.main(Test.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
    at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
    at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
    at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
    at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
    at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)

I can see the below line in logs but the file is not getting loaded.

21/09/22 07:21:43 INFO SparkContext: Added file dbfs:/tmp/application.conf at dbfs:/tmp/application.conf with timestamp 1632295303654
21/09/22 07:21:43 INFO Utils: Fetching dbfs:/tmp/application.conf to /local_disk0/spark-20456b30-fddd-42d7-9b23-9e4c0d3c91cd/userFiles-ee199161-6f48-4c47-b1c7-763ce7c0895f/fetchFileTemp4713981355306806616.tmp

Please help me in passing this typesafe config file to the spark-submit job using the appropriate spark submit job parameters.

We have tried below spark_submit_task parameters in the above json but still facing the same issue

[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraClassPath=/tmp/application.conf",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraClassPath=/tmp/",
  "--conf",
  "spark.executor.extraClassPath=/tmp/",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraClassPath=dbfs:/tmp/application.conf",
  "--conf",
  "spark.executor.extraClassPath=dbfs:/tmp/application.conf",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraClassPath=dbfs:/tmp/",
  "--conf",
  "spark.executor.extraClassPath=dbfs:/tmp/",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraClassPath=dbfs:./",
  "--conf",
  "spark.executor.extraClassPath=dbfs:./",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
[
  "--class",
  "Bootstrap",
  "--driver-java-options",
  "-Dconfig.file=application.conf",
  "--conf",
  "spark.executor.extraJavaOptions=-Dconfig.file=application.conf",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]

[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraJavaOptions=-Dconfig.file=application.conf",
  "--conf",
  "spark.executor.extraJavaOptions=-Dconfig.file=application.conf",
  "--files",
  "dbfs:/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
Alex Ott
  • 80,552
  • 8
  • 87
  • 132

1 Answers1

0

It's easier to pass the file name explicitly as a parameter to a job, and refer to it as /dbfs/tmp/application.conf (you'll need to handle that parameter in your code):

[
  "--class",
  "Bootstrap",
  "dbfs:/tmp/code-assembly-0.1.0.jar",
  "/dbfs/tmp/application.conf"
]

or refer via extra options:

[
  "--class",
  "Bootstrap",
  "--conf",
  "spark.driver.extraJavaOptions=-Dconfig.file=/dbfs/tmp/application.conf",
  "dbfs:/tmp/code-assembly-0.1.0.jar"
]
Alex Ott
  • 80,552
  • 8
  • 87
  • 132
  • Hi , I have tried above jsons for spark-submit , but did not worked. Can you please give us the code and the exact json that you are using for the spark-submit , it would help me a alot ! – Praveen Kumar Oct 28 '21 at 12:32