package Product
import org.apache.spark.sql.{SparkSession} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType, TimestampType}
object ProductUser { def main(args: Array[String]) {
val spark = SparkSession.builder.appName("Spark_Bigdata").master("local[*]").enableHiveSupport().getOrCreate()
val csvUser = "C:\\Users\\QTR1KOR\\Documents\\UserSession.csv"
val csvProduct = "C:\\Users\\QTR1KOR\\Documents\\ProductPurchase.csv"
//schema for usersession.csv files
val user_schema = StructType(List
(StructField("user_id",IntegerType,false),
StructField("name",StringType,false),
StructField("Time",TimestampType,false),
StructField("location",StringType,false)))
//schema for productpurchase.csv files
val product_schema = StructType(List
(StructField("Id",IntegerType,false),
StructField("Product_id",IntegerType,false),
StructField("Product_name",StringType,false),
StructField("Price",IntegerType,false)))
//Dataframe for user and product
val userdf = spark.read.format("csv").option("delimiter",",").option("header","true").schema(user_schema).load(csvUser)
val productdf = spark.read.format("csv").option("delimiter",",").option("header","true").schema(product_schema).load(csvProduct)
//join user and product
val joined_data = userdf.join(productdf,userdf("user_id").equalTo(productdf("Id")),"inner").selectExpr("user_id","name","Time","price")
val sum=joined_data.groupBy("user_id","name").sum("price")
//database
spark.sql("CREATE DATABASE IF NOT EXISTS Db")
spark.catalog.listDatabases.show(false)
val user_ex_table=spark.sql("CREATE EXTERNAL TABLE IF NOT EXISTS Db.user_ex_table(Id Int,name String,Time Timestamp,location String) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS Textfile LOCATION 'C://Users//QTR1KOR//IdeaProjects'")
user_ex_table.show()
val user_temp=userdf.createOrReplaceTempView("user_Temp")
//val tbl = spark.sql("Insert overwrite table Db.user_ex_table partition (location) select * from user_temp")
val load= spark.sql("LOAD DATA LOCAL INPATH 'C:/Users/QTR1KOR/Documents/UserSession.csv' INTO TABLE Db.user_ex_table")
}
} This is my code
INFO Persistence: Request to load fields "comment,name,type" of class org.apache.hadoop.hive.metastore.model.MFieldSchema but object is embedded, so ignored
This is what i need to clear and load data into externalTable here I'm not using HDFS IDE : Intellij Idea