0

I'm trying to do a left-outer join below. The code performs outer join using only one column 'ID'. can you please help me alter the code to include two more columns: 'date' and 'location' in the join condition? Thank you.

import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SparkSession
val conf = 
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
//val spark: 
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
import spark.implicits._



def getRelevantSegmentInfo(tableName: String, segmentName: String, pocs: Seq[String])(implicit
    spark: SparkSession
): DataFrame = {
  spark
    .table(tableName)
   .select(segmentName, "ID")
}

val firstDF: DataFrame = getRelevantSegmentInfo(result_as_sequence.head.tableName, 
result_as_sequence.head.segmentName,result_as_sequence.head.pocs)(spark)

val finalDF = result_as_sequence.tail.foldLeft(firstDF) {
  case (leftDF, segmentStruct) =>
    leftDF.join(
      getRelevantSegmentInfo(
        segmentStruct.tableName,
        segmentStruct.segmentName,
        segmentStruct.pocs
      )(spark),
      Seq("ID"),
      "left_outer"
    )
}
vicmartin
  • 15
  • 4
  • What did you try? What's not working? – Gaël J Sep 07 '22 at 19:12
  • Does this answer your question? [Spark specify multiple column conditions for dataframe join](https://stackoverflow.com/questions/31240148/spark-specify-multiple-column-conditions-for-dataframe-join) – Gaël J Sep 07 '22 at 19:13

0 Answers0