I'm trying to do a left-outer join below. The code performs outer join using only one column 'ID'. can you please help me alter the code to include two more columns: 'date' and 'location' in the join condition? Thank you.
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SparkSession
val conf =
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
//val spark:
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
import spark.implicits._
def getRelevantSegmentInfo(tableName: String, segmentName: String, pocs: Seq[String])(implicit
spark: SparkSession
): DataFrame = {
spark
.table(tableName)
.select(segmentName, "ID")
}
val firstDF: DataFrame = getRelevantSegmentInfo(result_as_sequence.head.tableName,
result_as_sequence.head.segmentName,result_as_sequence.head.pocs)(spark)
val finalDF = result_as_sequence.tail.foldLeft(firstDF) {
case (leftDF, segmentStruct) =>
leftDF.join(
getRelevantSegmentInfo(
segmentStruct.tableName,
segmentStruct.segmentName,
segmentStruct.pocs
)(spark),
Seq("ID"),
"left_outer"
)
}