from pyspark.sql import *
from IPython.core.display import display, HTML
from pyspark.sql.functions import first
from functools import reduce
display(HTML("<style>.container { width:100% !important; }</style>"))
spark = SparkSession \
.builder \
.appName("Python Spark SQL basic example") \
.config("spark.some.config.option") \
.getOrCreate()
for i in range(2,10):
globals()['folders{}'.format(i)] = ["./result/20200"+str(i)+"/data1/*.csv"]
print(globals()['folders{}'.format(i)])
globals()['df{}'.format(i)]=spark.read.option("header", "false").csv(globals()['folders{}'.format(i)])
globals()['df{}'.format(i)].createOrReplaceTempView("iris")
globals()['concat{}'.format(i)]=globals()['df{}'.format(i)].groupBy().pivot("_c0").agg(first('_c7'))
globals()['concat{}'.format(i)].show()
uni2_9=unionAll(concat2, concat3, concat4,concat5,concat6,concat7,concat8,concat9)
uni2_9.show()
I want to combine dataframes sequentially into one table.
I used this a while ago. but now i got an error in this line ---> 30 uni2_9=unionAll([concat2, concat3, concat4,concat5,concat6,concat7,concat8,concat9]) NameError: name 'unionAll' is not defined
How to use the correct spark unionall?