val resDF = temp_df.select(
'data.getItem(0).alias("c0"),
'data.getItem(1).alias("c1"),
'data.getItem(2).alias("c2"),
'data.getItem(3).alias("c3")
// ...
)
resDF.show(false)
// +------------------+------------------------------------+---+----------+
// |c0 |c1 |c2 |c3 |
// +------------------+------------------------------------+---+----------+
// |row-r9pv-p86t.ifsp|00000000-0000-0000-0838-60C2FFCC43AE|0 |1574264158|
// |row-7v2v~88z5-44se|00000000-0000-0000-C8FC-DDD3F9A72DFF|0 |1574264158|
// |row-hzc9-4kvv~mbc9|00000000-0000-0000-562E-D9A0792557FC|0 |1574264158|
// +------------------+------------------------------------+---+----------+
V 2 (use WithColumn and concat_ws):
val sourceDF = Seq(
Array("row-r9pv-p86t.ifsp", "00000000-0000-0000-0838-60C2FFCC43AE", "0", "1574264158", "", "1574264158", "", "{ }", "2007", "ZOEY", "KINGS", "F", "11"),
Array("row-7v2v~88z5-44se", "00000000-0000-0000-C8FC-DDD3F9A72DFF", "0", "1574264158", "", "1574264158", "", "{ }", "2007", "ZOEY", "SUFFOLK", "F", "6"),
Array("row-hzc9-4kvv~mbc9", "00000000-0000-0000-562E-D9A0792557FC", "0", "1574264158", "", "1574264158", "", "{ }", "2007", "ZOEY", "MONROE", "F", "6")
).toDF("dataColumn")
sourceDF.show(false)
// +-------------------------------------------------------------------------------------------------------------------------+
// |dataColumn |
// +-------------------------------------------------------------------------------------------------------------------------+
// |[row-r9pv-p86t.ifsp, 00000000-0000-0000-0838-60C2FFCC43AE, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, KINGS, F, 11] |
// |[row-7v2v~88z5-44se, 00000000-0000-0000-C8FC-DDD3F9A72DFF, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, SUFFOLK, F, 6]|
// |[row-hzc9-4kvv~mbc9, 00000000-0000-0000-562E-D9A0792557FC, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, MONROE, F, 6] |
// +-------------------------------------------------------------------------------------------------------------------------+
val df1 = sourceDF
.withColumn("dataString", concat_ws(", ", 'dataColumn))
.select('dataString)
df1.printSchema()
df1.show(false)
// root
// |-- dataString: string (nullable = false)
//
// +-----------------------------------------------------------------------------------------------------------------------+
// |dataString |
// +-----------------------------------------------------------------------------------------------------------------------+
// |row-r9pv-p86t.ifsp, 00000000-0000-0000-0838-60C2FFCC43AE, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, KINGS, F, 11 |
// |row-7v2v~88z5-44se, 00000000-0000-0000-C8FC-DDD3F9A72DFF, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, SUFFOLK, F, 6|
// |row-hzc9-4kvv~mbc9, 00000000-0000-0000-562E-D9A0792557FC, 0, 1574264158, , 1574264158, , { }, 2007, ZOEY, MONROE, F, 6 |
// +-----------------------------------------------------------------------------------------------------------------------+
val df2 = df1.select(
split('dataString, ", ").getItem(0).alias("c0"),
split('dataString, ", ").getItem(1).alias("c1"),
split('dataString, ", ").getItem(2).alias("c2"),
split('dataString, ", ").getItem(3).alias("c3"),
split('dataString, ", ").getItem(4).alias("c4"),
split('dataString, ", ").getItem(5).alias("c5"),
split('dataString, ", ").getItem(6).alias("c6"),
split('dataString, ", ").getItem(7).alias("c7"),
split('dataString, ", ").getItem(8).alias("c8"),
split('dataString, ", ").getItem(9).alias("c9"),
split('dataString, ", ").getItem(10).alias("c10"),
split('dataString, ", ").getItem(11).alias("c11"),
split('dataString, ", ").getItem(12).alias("c12")
)
df2.printSchema()
// root
// |-- c0: string (nullable = true)
// |-- c1: string (nullable = true)
// |-- c2: string (nullable = true)
// |-- c3: string (nullable = true)
// |-- c4: string (nullable = true)
// |-- c5: string (nullable = true)
// |-- c6: string (nullable = true)
// |-- c7: string (nullable = true)
// |-- c8: string (nullable = true)
// |-- c9: string (nullable = true)
// |-- c10: string (nullable = true)
// |-- c11: string (nullable = true)
// |-- c12: string (nullable = true)
df2.show(false)
// +------------------+------------------------------------+---+----------+---+----------+---+---+----+----+-------+---+---+
// |c0 |c1 |c2 |c3 |c4 |c5 |c6 |c7 |c8 |c9 |c10 |c11|c12|
// +------------------+------------------------------------+---+----------+---+----------+---+---+----+----+-------+---+---+
// |row-r9pv-p86t.ifsp|00000000-0000-0000-0838-60C2FFCC43AE|0 |1574264158| |1574264158| |{ }|2007|ZOEY|KINGS |F |11 |
// |row-7v2v~88z5-44se|00000000-0000-0000-C8FC-DDD3F9A72DFF|0 |1574264158| |1574264158| |{ }|2007|ZOEY|SUFFOLK|F |6 |
// |row-hzc9-4kvv~mbc9|00000000-0000-0000-562E-D9A0792557FC|0 |1574264158| |1574264158| |{ }|2007|ZOEY|MONROE |F |6 |
// +------------------+------------------------------------+---+----------+---+----------+---+---+----+----+-------+---+---+