0

package Product

import org.apache.spark.sql.{SparkSession} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType, TimestampType}

object ProductUser { def main(args: Array[String]) {

  val spark = SparkSession.builder.appName("Spark_Bigdata").master("local[*]").enableHiveSupport().getOrCreate()
  val csvUser = "C:\\Users\\QTR1KOR\\Documents\\UserSession.csv"
  val csvProduct = "C:\\Users\\QTR1KOR\\Documents\\ProductPurchase.csv"

  //schema for usersession.csv files
  val user_schema = StructType(List
  (StructField("user_id",IntegerType,false),
    StructField("name",StringType,false),
    StructField("Time",TimestampType,false),
    StructField("location",StringType,false)))

  //schema for productpurchase.csv files
  val product_schema = StructType(List
  (StructField("Id",IntegerType,false),
    StructField("Product_id",IntegerType,false),
    StructField("Product_name",StringType,false),
    StructField("Price",IntegerType,false)))

  //Dataframe for user and product
  val userdf = spark.read.format("csv").option("delimiter",",").option("header","true").schema(user_schema).load(csvUser)
  val productdf = spark.read.format("csv").option("delimiter",",").option("header","true").schema(product_schema).load(csvProduct)


  //join user and product
  val joined_data = userdf.join(productdf,userdf("user_id").equalTo(productdf("Id")),"inner").selectExpr("user_id","name","Time","price")


  val  sum=joined_data.groupBy("user_id","name").sum("price")

  //database
 spark.sql("CREATE DATABASE IF NOT EXISTS Db")
 spark.catalog.listDatabases.show(false)


  val user_ex_table=spark.sql("CREATE EXTERNAL TABLE IF NOT EXISTS Db.user_ex_table(Id Int,name String,Time Timestamp,location String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','  STORED AS Textfile LOCATION 'C://Users//QTR1KOR//IdeaProjects'")
  user_ex_table.show()
 val user_temp=userdf.createOrReplaceTempView("user_Temp")
 //val tbl = spark.sql("Insert overwrite table Db.user_ex_table partition (location)  select * from user_temp")
  val load= spark.sql("LOAD DATA LOCAL INPATH 'C:/Users/QTR1KOR/Documents/UserSession.csv'  INTO TABLE Db.user_ex_table")

}

} This is my code

INFO Persistence: Request to load fields "comment,name,type" of class org.apache.hadoop.hive.metastore.model.MFieldSchema but object is embedded, so ignored

This is what i need to clear and load data into externalTable here I'm not using HDFS IDE : Intellij Idea

  • 1
    Update Spark to 1.5. See https://issues.apache.org/jira/browse/SPARK-8435?page=com.atlassian.jira.plugin.system.issuetabpanels%3Aall-tabpanel – Konstantin Annikov Apr 19 '22 at 08:07
  • Stack overflow thrives on well written questions, as a community we should be asking questions which solve specific problems. To do that it is very important that when asking questions we provide a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). When the code relies on external data, it's important to find a way to include them. Please also review the page [How do I ask a good question](https://stackoverflow.com/help/how-to-ask)? for more tips. Asking a question well helps, helps the community understand your issue and helps you get a good answer. – Col Bates - collynomial Apr 21 '22 at 16:42

0 Answers0