I'm trying to write a table with delta format on my local machine with the code refer to Delta documentations.
import pyspark
from delta import *
builder = pyspark.sql.SparkSession.builder.appName("MyApp") \
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
stored_path = "E:\tmp\delta.123"
spark = configure_spark_with_delta_pip(builder).getOrCreate()
data = spark.range(0, 5)
data.write.format("delta").save(stored_path)
print("============================================================================================")
print("Finish")
print("============================================================================================")
But I got this error.
Traceback (most recent call last):
File "e:\tests\test_write_delta.py", line 14, in <module>
data.write.format("delta").save(stored_path)
File "C:\Users\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pyspark\sql\readwriter.py", line 968, in save
self._jwrite.save(path)
File "C:\Users\AppData\Local\Programs\Python\Python38-32\lib\site-packages\py4j\java_gateway.py", line 1321, in __call__
return_value = get_return_value(
File "C:\Users\AppData\Local\Programs\Python\Python38-32\lib\site-packages\pyspark\sql\utils.py", line 190, in deco
return f(*a, **kw)
File "C:\Users\AppData\Local\Programs\Python\Python38-32\lib\site-packages\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o37.save.
: com.google.common.util.concurrent.ExecutionError: java.lang.UnsatisfiedLinkError: 'boolean
org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(java.lang.String, int)'
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2261)
at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4789)
at org.apache.spark.sql.delta.DeltaLog$.getDeltaLogFromCache$1(DeltaLog.scala:604)
at org.apache.spark.sql.delta.DeltaLog$.apply(DeltaLog.scala:611)
I tried both delta-spark version 2.0.0 and 2.1.0. What's missing here or what is the correct way to run delta on a local project?