0

I am trying to parse data from a XML file through Spark using databrics library

Here is my code:

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions
import java.text.Format
import org.apache.spark.sql.functions.concat_ws
import org.apache.spark.sql
import org.apache.spark.sql.types._
import org.apache.spark.sql.catalyst.plans.logical.With
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.functions.udf
import scala.sys.process._
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.functions._ 
object printschema 
{
   def main(args: Array[String]): Unit = 
  {
      val conf = new SparkConf().setAppName("printschema").setMaster("local")
      conf.set("spark.debug.maxToStringFields", "10000000") 
      val context = new SparkContext(conf)
      val sqlCotext = new SQLContext(context)
      import sqlCotext.implicits._
      val df = sqlCotext.read.format("com.databricks.spark.xml")
     .option("rowTag", "us-bibliographic-data-application")
     .option("treatEmptyValuesAsNulls", true)
     .load("/Users/praveen/Desktop/ipa0105.xml")
    val q1= df.withColumn("document",$"application-reference.document-id.doc-number".cast(sql.types.StringType))
           .withColumn("document_number",$"application-reference.document-id.doc-number".cast(sql.types.StringType)).select("document","document_number").collect()
           for(l<-q1)
           {
             val m1=l.get(0)
             val m2=l.get(1)
             println(m1,m2)
           }         
  }
}

When I run the code on ScalaIDE/IntelliJ IDEA it works fine and here is my Output.

(14789882,14789882)
(14755945,14755945)
(14755919,14755919)

But, when I build a jar and execute it by using spark-submit it returns simply null values

OUTPUT :

NULL,NULL
NULL,NULL
NULL,NULL

Here is my Spark submit:

./spark-submit --jars /home/hadoop/spark-xml_2.11-0.4.0.jar --class inndata.praveen --master local[2] /home/hadoop/ip/target/scala-2.11/ip_2.11-1.0.jar

user6325753
  • 585
  • 4
  • 10
  • 33

0 Answers0