I am trying to read delta format file from azure storage using code below in jupyter notebook which is running in mapr cluster.
when i am running this code it is throwing issue that java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.(java.net.URI, org.apache.hadoop.conf.Configuration)
%%configure -f
{ "conf": {"spark.jars.packages": "io.delta:delta-core_2.11:0.5.0,org.apache.hadoop:hadoop-azure:3.3.1"}, "kind": "spark",
"driverMemory" : "4g", "executorMemory": "2g", "executorCores": 3, "numExecutors":4}
val storage_account_name_dim = "storageAcc"
val sas_access_key_dim = "saskey"
spark.conf.set("spark.delta.logStore.class","org.apache.spark.sql.delta.storage.AzureLogStore")
spark.conf.set("fs.AbstractFileSystem.wasbs.impl", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
spark.conf.set(s"fs.azure.sas.dsr.$storage_account_name_dim.blob.core.windows.net", sas_access_key_dim)
val refdf = spark.read.format("delta").load("wasbs://dsr@storageAcc.blob.core.windows.net/dim/ds/DS_CUSTOM_WAG_VENDOR_UPC")
refdf.show(1)
it is giving me below issue
java.lang.RuntimeException: java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.<init>(java.net.URI, org.apache.hadoop.conf.Configuration)
at org.apache.hadoop.fs.AbstractFileSystem.newInstance(AbstractFileSystem.java:135)
at org.apache.hadoop.fs.AbstractFileSystem.createFileSystem(AbstractFileSystem.java:164)
at org.apache.hadoop.fs.AbstractFileSystem.get(AbstractFileSystem.java:249)
at org.apache.hadoop.fs.FileContext$2.run(FileContext.java:334)
at org.apache.hadoop.fs.FileContext$2.run(FileContext.java:331)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1669)
at org.apache.hadoop.fs.FileContext.getAbstractFileSystem(FileContext.java:331)
at org.apache.hadoop.fs.FileContext.getFileContext(FileContext.java:448)
at org.apache.spark.sql.delta.storage.HDFSLogStore.getFileContext(HDFSLogStore.scala:53)
at org.apache.spark.sql.delta.storage.HDFSLogStore.listFrom(HDFSLogStore.scala:137)
at org.apache.spark.sql.delta.Checkpoints$class.findLastCompleteCheckpoint(Checkpoints.scala:174)
at org.apache.spark.sql.delta.DeltaLog.findLastCompleteCheckpoint(DeltaLog.scala:58)
at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:156)
at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
at org.apache.spark.sql.delta.Checkpoints$class.lastCheckpoint(Checkpoints.scala:133)
at org.apache.spark.sql.delta.DeltaLog.lastCheckpoint(DeltaLog.scala:58)
at org.apache.spark.sql.delta.DeltaLog.<init>(DeltaLog.scala:139)
at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1$$anonfun$apply$10.apply(DeltaLog.scala:744)
at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1$$anonfun$apply$10.apply(DeltaLog.scala:744)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1.apply(DeltaLog.scala:743)
at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1.apply(DeltaLog.scala:743)
at com.databricks.spark.util.DatabricksLogging$class.recordOperation(DatabricksLogging.scala:77)
at org.apache.spark.sql.delta.DeltaLog$.recordOperation(DeltaLog.scala:671)
at org.apache.spark.sql.delta.metering.DeltaLogging$class.recordDeltaOperation(DeltaLogging.scala:103)
at org.apache.spark.sql.delta.DeltaLog$.recordDeltaOperation(DeltaLog.scala:671)
at org.apache.spark.sql.delta.DeltaLog$$anon$3.call(DeltaLog.scala:742)
at org.apache.spark.sql.delta.DeltaLog$$anon$3.call(DeltaLog.scala:740)
at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4792)
at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)
at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4789)
at org.apache.spark.sql.delta.DeltaLog$.apply(DeltaLog.scala:740)
at org.apache.spark.sql.delta.DeltaLog$.forTable(DeltaLog.scala:712)
at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:169)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
... 50 elided
Caused by: java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.<init>(java.net.URI, org.apache.hadoop.conf.Configuration)
at java.lang.Class.getConstructor0(Class.java:3082)
at java.lang.Class.getDeclaredConstructor(Class.java:2178)
at org.apache.hadoop.fs.AbstractFileSystem.newInstance(AbstractFileSystem.java:129)
... 95 more
same code is working for parquet file , any help will be much appreciated