I am running a databricks job that uses ProtoSQL.udf. The data frame has a column that has encoded protoValue. The parse method is used to create a new column which stores json extracted from protoValue.
scalaVersion = 2.12 scalapb.version = 0.11.8
Using below dependencies
implementation "com.thesamet.scalapb:sparksql32-scalapb0_11_2.12:1.0.0-M1"
implementation "com.thesamet.scalapb:scalapb-json4s_2.12:0.11.0"
implementation "com.thesamet.scalapb:scalapb-runtime_2.12:0.11.8"
getting the below error during runtime at ProtoSQL.udf
java.lang.VerifyError: class frameless.functions.Spark2_4_LambdaVariable overrides final method genCode.(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodegenContext;)Lorg/apache/spark/sql/catalyst/expressions/codegen/ExprCode;
Stacktrace
OpenJDK 64-Bit Server VM warning: ignoring option MaxPermSize=512m; support was removed in 8.0 22/02/25 11:00:28 ERROR Uncaught throwable from user code: java.lang.VerifyError: class frameless.functions.Spark2_4_LambdaVariable overrides final method genCode.(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodegenContext;)Lorg/apache/spark/sql/catalyst/expressions/codegen/ExprCode; at java.lang.ClassLoader.defineClass1(Native Method) at java.lang.ClassLoader.defineClass(ClassLoader.java:757) at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) at java.net.URLClassLoader.defineClass(URLClassLoader.java:468) at java.net.URLClassLoader.access$100(URLClassLoader.java:74) at java.net.URLClassLoader$1.run(URLClassLoader.java:369) at java.net.URLClassLoader$1.run(URLClassLoader.java:363) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:362) at java.lang.ClassLoader.loadClass(ClassLoader.java:419) at com.databricks.backend.daemon.driver.ClassLoaders$LibraryClassLoader.loadClass(ClassLoaders.scala:151) at java.lang.ClassLoader.loadClass(ClassLoader.java:352) at frameless.functions.FramelessUdf$.apply(Udf.scala:220) at scalapb.spark.Udfs.$anonfun$udf$1(Udfs.scala:12) at com.atlassian.plato.jobs.jira.reader.ProtoParser.parse(ProtoParser.scala:21)
def parse(df: DataFrame)(implicit entityCompanion: EntityCompanion): DataFrame = {
implicit val messageEncoder: typedEncoders.GeneratedMessageEncoder = GeneratedMessageEncoder.encoder()
val parseProto = ProtoSQL.udf { (protoValue: String) =>
entityCompanion.parseFrom(Base64.getDecoder.decode(protoValue))
}
val parsedDf = df.withColumn("value", parseProto(col("payload.protoValue")))
parsedDf
}
}
object EntityMapper {
type EntityCompanion = GeneratedMessageCompanion[_ <: GeneratedMessage]
def getEntityCompanionObject(entityType: String): EntityCompanion = {
EntityFactory.getEntity(entityType).getCompanion()
}
}