Spark 3.4.0 Scala 2.12.12 compile-time and scala 2.12.17 packaged with spark at runtime kafka
While migrating from spark2.2 I am facing issues to read kafka streams and getting into below error, Experts please help me as I am relatively new to the kind of code
java.lang.RuntimeException: error reading Scala signature of org.apache.spark.internal.Logging: unsafe symbol Level (child of package log4j) in runtime reflection universe
at scala.reflect.internal.pickling.UnPickler.unpickle(UnPickler.scala:51)
at scala.reflect.runtime.JavaMirrors$JavaMirror.unpickleClass(JavaMirrors.scala:660)
at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.$anonfun$complete$2(SymbolLoaders.scala:37)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.reflect.internal.SymbolTable.slowButSafeEnteringPhaseNotLaterThan(SymbolTable.scala:333)
at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.complete(SymbolLoaders.scala:34)
at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1551)
at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1514)
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$7.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:203)
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.$anonfun$info$1(SynchronizedSymbols.scala:158)
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:149)
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158)
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$7.info(SynchronizedSymbols.scala:203)
at scala.reflect.internal.Types$TypeRef.baseClasses(Types.scala:2280)
at scala.reflect.internal.Types.computeBaseClasses(Types.scala:1478)
at scala.reflect.internal.Types.computeBaseClasses$(Types.scala:1455)
at scala.reflect.internal.SymbolTable.computeBaseClasses(SymbolTable.scala:28)
at scala.reflect.internal.Types.$anonfun$defineBaseClassesOfCompoundType$2(Types.scala:1556)
at scala.reflect.internal.Types$CompoundType.memo(Types.scala:1426)
at scala.reflect.internal.Types.defineBaseClassesOfCompoundType(Types.scala:1556)
at scala.reflect.internal.Types.defineBaseClassesOfCompoundType$(Types.scala:1548)
at scala.reflect.runtime.JavaUniverse.scala$reflect$runtime$SynchronizedTypes$$super$defineBaseClassesOfCompoundType(JavaUniverse.scala:30)
at scala.reflect.runtime.SynchronizedTypes.$anonfun$defineBaseClassesOfCompoundType$1(SynchronizedTypes.scala:113)
at scala.reflect.runtime.SynchronizedTypes.defineBaseClassesOfCompoundType(SynchronizedTypes.scala:113)
at scala.reflect.runtime.SynchronizedTypes.defineBaseClassesOfCompoundType$(SynchronizedTypes.scala:112)
at scala.reflect.runtime.JavaUniverse.defineBaseClassesOfCompoundType(JavaUniverse.scala:30)
at scala.reflect.internal.Types$CompoundType.baseClasses(Types.scala:1409)
at scala.reflect.internal.Types$TypeRef.baseClasses(Types.scala:2280)
at scala.reflect.internal.Types.computeBaseClasses(Types.scala:1478)
at scala.reflect.internal.Types.computeBaseClasses$(Types.scala:1455)
at scala.reflect.internal.SymbolTable.computeBaseClasses(SymbolTable.scala:28)
at scala.reflect.internal.Types.$anonfun$defineBaseClassesOfCompoundType$2(Types.scala:1556)
at scala.reflect.internal.Types$CompoundType.memo(Types.scala:1426)
at scala.reflect.internal.Types.defineBaseClassesOfCompoundType(Types.scala:1556)
at scala.reflect.internal.Types.defineBaseClassesOfCompoundType$(Types.scala:1548)
at scala.reflect.runtime.JavaUniverse.scala$reflect$runtime$SynchronizedTypes$$super$defineBaseClassesOfCompoundType(JavaUniverse.scala:30)
at scala.reflect.runtime.SynchronizedTypes.$anonfun$defineBaseClassesOfCompoundType$1(SynchronizedTypes.scala:113)
at scala.reflect.runtime.SynchronizedTypes.defineBaseClassesOfCompoundType(SynchronizedTypes.scala:113)
at scala.reflect.runtime.SynchronizedTypes.defineBaseClassesOfCompoundType$(SynchronizedTypes.scala:112)
at scala.reflect.runtime.JavaUniverse.defineBaseClassesOfCompoundType(JavaUniverse.scala:30)
at scala.reflect.internal.Types$CompoundType.baseClasses(Types.scala:1409)
at scala.reflect.internal.Types$Type.findDecl(Types.scala:1010)
at scala.reflect.internal.Types$Type.decl(Types.scala:597)
at scala.reflect.internal.ReificationSupport$ReificationSupportImpl.selectOverloadedMethod(ReificationSupport.scala:43)
at scala.reflect.internal.ReificationSupport$ReificationSupportImpl.selectOverloadedMethod(ReificationSupport.scala:23)
at org.apache.spark.sql.catalyst.ScalaReflection$$typecreator9$1.apply(ScalaReflection.scala:862)
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe$lzycompute(TypeTags.scala:237)
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe(TypeTags.scala:237)
at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf(ScalaReflection.scala:999)
at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf$(ScalaReflection.scala:997)
at org.apache.spark.sql.catalyst.ScalaReflection$.localTypeOf(ScalaReflection.scala:60)
at org.apache.spark.sql.catalyst.ScalaReflection$.encoderFor(ScalaReflection.scala:862)
at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$encoderFor$1(ScalaReflection.scala:814)
at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:73)
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:984)
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:983)
at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:60)
at org.apache.spark.sql.catalyst.ScalaReflection$.encoderFor(ScalaReflection.scala:811)
at org.apache.spark.sql.catalyst.ScalaReflection$.encoderFor(ScalaReflection.scala:805)
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$.apply(ExpressionEncoder.scala:50)
at org.apache.spark.sql.Encoders$.product(Encoders.scala:308)
at org.apache.spark.sql.LowPrioritySQLImplicits.newProductEncoder(SQLImplicits.scala:264)
at org.apache.spark.sql.LowPrioritySQLImplicits.newProductEncoder$(SQLImplicits.scala:264)
at org.apache.spark.sql.SQLImplicits.newProductEncoder(SQLImplicits.scala:32)
at com.mycomosi.eaa.topology.ingestion.application.source.impl.ReadTopologyFromKafka.execute(ReadTopologyFromKafka.scala:40)
Following is code that I am running where Trying to read kafka messages using spark 3.4.0.
import java.sql.Timestamp
import com.fasterxml.jackson.core.`type`.TypeReference
import com.fasterxml.jackson.databind.ObjectMapper
import com.mycomosi.eaa.alarm.propagation.application.monitoring.MbeanLoggingScheduler
import com.mycomosi.eaa.common.application.logging.Logging
import com.mycomosi.eaa.common.application.monitoring.ClientType
import com.mycomosi.eaa.topology.ingestion.application.logging.SkewMetrics
import com.mycomosi.eaa.topology.ingestion.application.processing.model.TopologyEventSto
import com.mycomosi.eaa.topology.ingestion.application.settings.TCMSettings
import com.mycomosi.eaa.topology.ingestion.application.source.ReadTopology
import com.mycomosi.eaa.topology.ingestion.interfaces.kafka.Converter._
import com.mycomosi.unit.api.notifier.interfaces.v1.SubscriberNotificationDto
import com.mycomosi.unit.api.onboarding.interfaces.v1.catalog.TopologyTemplateDto
import com.mycomosi.unit.api.query.interfaces.v1.topologyinstance.TopologyInstanceDto
import com.mycomosi.unit.api.shared.crud.OperationType
import com.mycomosi.unit.api.shared.notification.EventType
import org.apache.spark.sql.{Dataset, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
class ReadTopologyFromKafka(implicit settings: TCMSettings, lagMetrics: SkewMetrics) extends ReadTopology with Serializable with Logging {
@transient lazy val logger: Logger = LoggerFactory.getLogger(getClass.getCanonicalName)
def execute(spark: SparkSession): Dataset[TopologyEventSto] = {
import spark.implicits._
val kafkaStream = spark
.readStream
.format("kafka")
.option("kafka.bootstrap.servers", settings.kafka.brokers)
.option("kafka.max.partition.fetch.bytes", settings.kafka.maxRequestSize)
.option("startingOffsets", settings.kafka.startingOffsets)
.option("maxOffsetsPerTrigger", settings.kafka.maxOffsetsPerTrigger.getOrElse(1000000L))
.option("failOnDataLoss", "false")
.option("subscribe", settings.kafka.topologyTopic)
.load()
}
UPDATE: I could move ahead from this issue as I observed that earlier while building docker image it was removing log4j-*.jar with spark-2.2.0 and it kept running with updated spark-3.4.0, and when I changed
RUN rm ${SPARK_HOME}/jars/log4j-*
To
RUN rm ${SPARK_HOME}/jars/log4j-slf4j2-impl* && rm ${SPARK_HOME}/jars/log4j-1.2-api-*
Fixed the issue, I still don't know what it has changed internally and how it fixed but if someone can share any knowledge, would really appreciate.
Motivation to delete only above jars were due to this code was running well for me on Intellij and I checked what spark log4j jar are not needed so I did remove above ones only