0

I am trying to read delta format file from azure storage using code below in jupyter notebook which is running in mapr cluster.

when i am running this code it is throwing issue that java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.(java.net.URI, org.apache.hadoop.conf.Configuration)

%%configure -f
{ "conf": {"spark.jars.packages": "io.delta:delta-core_2.11:0.5.0,org.apache.hadoop:hadoop-azure:3.3.1"}, "kind": "spark",
"driverMemory" : "4g", "executorMemory": "2g", "executorCores": 3, "numExecutors":4}

val storage_account_name_dim = "storageAcc" 
val sas_access_key_dim = "saskey"

spark.conf.set("spark.delta.logStore.class","org.apache.spark.sql.delta.storage.AzureLogStore")
spark.conf.set("fs.AbstractFileSystem.wasbs.impl", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
spark.conf.set(s"fs.azure.sas.dsr.$storage_account_name_dim.blob.core.windows.net", sas_access_key_dim)
val refdf = spark.read.format("delta").load("wasbs://dsr@storageAcc.blob.core.windows.net/dim/ds/DS_CUSTOM_WAG_VENDOR_UPC")
refdf.show(1)

it is giving me below issue

java.lang.RuntimeException: java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.<init>(java.net.URI, org.apache.hadoop.conf.Configuration)
  at org.apache.hadoop.fs.AbstractFileSystem.newInstance(AbstractFileSystem.java:135)
  at org.apache.hadoop.fs.AbstractFileSystem.createFileSystem(AbstractFileSystem.java:164)
  at org.apache.hadoop.fs.AbstractFileSystem.get(AbstractFileSystem.java:249)
  at org.apache.hadoop.fs.FileContext$2.run(FileContext.java:334)
  at org.apache.hadoop.fs.FileContext$2.run(FileContext.java:331)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:422)
  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1669)
  at org.apache.hadoop.fs.FileContext.getAbstractFileSystem(FileContext.java:331)
  at org.apache.hadoop.fs.FileContext.getFileContext(FileContext.java:448)
  at org.apache.spark.sql.delta.storage.HDFSLogStore.getFileContext(HDFSLogStore.scala:53)
  at org.apache.spark.sql.delta.storage.HDFSLogStore.listFrom(HDFSLogStore.scala:137)
  at org.apache.spark.sql.delta.Checkpoints$class.findLastCompleteCheckpoint(Checkpoints.scala:174)
  at org.apache.spark.sql.delta.DeltaLog.findLastCompleteCheckpoint(DeltaLog.scala:58)
  at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:156)
  at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
  at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
  at org.apache.spark.sql.delta.Checkpoints$class.loadMetadataFromFile(Checkpoints.scala:150)
  at org.apache.spark.sql.delta.Checkpoints$class.lastCheckpoint(Checkpoints.scala:133)
  at org.apache.spark.sql.delta.DeltaLog.lastCheckpoint(DeltaLog.scala:58)
  at org.apache.spark.sql.delta.DeltaLog.<init>(DeltaLog.scala:139)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1$$anonfun$apply$10.apply(DeltaLog.scala:744)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1$$anonfun$apply$10.apply(DeltaLog.scala:744)
  at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:194)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1.apply(DeltaLog.scala:743)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3$$anonfun$call$1.apply(DeltaLog.scala:743)
  at com.databricks.spark.util.DatabricksLogging$class.recordOperation(DatabricksLogging.scala:77)
  at org.apache.spark.sql.delta.DeltaLog$.recordOperation(DeltaLog.scala:671)
  at org.apache.spark.sql.delta.metering.DeltaLogging$class.recordDeltaOperation(DeltaLogging.scala:103)
  at org.apache.spark.sql.delta.DeltaLog$.recordDeltaOperation(DeltaLog.scala:671)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3.call(DeltaLog.scala:742)
  at org.apache.spark.sql.delta.DeltaLog$$anon$3.call(DeltaLog.scala:740)
  at com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4792)
  at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
  at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
  at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
  at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2257)
  at com.google.common.cache.LocalCache.get(LocalCache.java:4000)
  at com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4789)
  at org.apache.spark.sql.delta.DeltaLog$.apply(DeltaLog.scala:740)
  at org.apache.spark.sql.delta.DeltaLog$.forTable(DeltaLog.scala:712)
  at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:169)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
  ... 50 elided
Caused by: java.lang.NoSuchMethodException: org.apache.hadoop.fs.azure.NativeAzureFileSystem.<init>(java.net.URI, org.apache.hadoop.conf.Configuration)
  at java.lang.Class.getConstructor0(Class.java:3082)
  at java.lang.Class.getDeclaredConstructor(Class.java:2178)
  at org.apache.hadoop.fs.AbstractFileSystem.newInstance(AbstractFileSystem.java:129)
  ... 95 more

same code is working for parquet file , any help will be much appreciated

Shalaj
  • 579
  • 8
  • 19

1 Answers1

1

I Resolved it by adding spark.delta.logStore.class in conf parameter

{ "conf": {"spark.jars.packages": "io.delta:delta-core_2.11:0.5.0,org.apache.hadoop:hadoop-azure:3.3.1","spark.delta.logStore.class":"org.apache.spark.sql.delta.storage.AzureLogStore"}, "kind": "spark","driverMemory" : "4g", "executorMemory": "2g", "executorCores": 3, "numExecutors":4}

Shalaj
  • 579
  • 8
  • 19