I'm trying to create a web service with spring boot to do some data analysis with spark, but spark is a pain to setup and manage with all the configurations and dependency management.
I'm facing this error when I run the application
Stack trace:
java.lang.IllegalStateException: Failed to execute CommandLineRunner
at org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:780) [spring-boot-2.6.4.jar:2.6.4]
at org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:761) [spring-boot-2.6.4.jar:2.6.4]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:310) [spring-boot-2.6.4.jar:2.6.4]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1312) [spring-boot-2.6.4.jar:2.6.4]
at org.springframework.boot.SpringApplication.run(SpringApplication.java:1301) [spring-boot-2.6.4.jar:2.6.4]
at com.example.wuzzufdataanalysis.WuzzufDataAnalysisApplication.main(WuzzufDataAnalysisApplication.java:10) [classes/:na]
Caused by: java.lang.RuntimeException: error reading Scala signature of org.apache.spark.sql.package: unsafe symbol Unstable (child of package annotation) in runtime reflection universe
at scala.reflect.internal.pickling.UnPickler.unpickle(UnPickler.scala:51) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.JavaMirrors$JavaMirror.unpickleClass(JavaMirrors.scala:660) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.$anonfun$complete$2(SymbolLoaders.scala:37) ~[scala-reflect-2.12.15.jar:na]
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) ~[scala-library-2.12.15.jar:na]
at scala.reflect.internal.SymbolTable.slowButSafeEnteringPhaseNotLaterThan(SymbolTable.scala:333) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.complete(SymbolLoaders.scala:34) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1551) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1514) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:221) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.$anonfun$info$1(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:149) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.info(SynchronizedSymbols.scala:221) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.SymbolTable.openPackageModule(SymbolTable.scala:356) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.SymbolTable.openPackageModule(SymbolTable.scala:411) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SymbolLoaders$LazyPackageType.$anonfun$complete$3(SymbolLoaders.scala:83) ~[scala-reflect-2.12.15.jar:na]
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) ~[scala-library-2.12.15.jar:na]
at scala.reflect.internal.SymbolTable.slowButSafeEnteringPhaseNotLaterThan(SymbolTable.scala:333) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SymbolLoaders$LazyPackageType.complete(SymbolLoaders.scala:80) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1551) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1514) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$9.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:209) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.$anonfun$info$1(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:149) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$9.info(SynchronizedSymbols.scala:209) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$TypeRef.baseClasses(Types.scala:2280) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.tpe.FindMembers$FindMemberBase.init(FindMembers.scala:36) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.tpe.FindMembers$FindMember.init(FindMembers.scala:257) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$Type.$anonfun$findMember$1(Types.scala:1042) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$Type.findMemberInternal$1(Types.scala:1041) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$Type.findMember(Types.scala:1046) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$Type.memberBasedOnName(Types.scala:672) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Types$Type.member(Types.scala:636) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:55) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:51) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:29) ~[scala-reflect-2.12.15.jar:na]
at org.apache.spark.sql.catalyst.ScalaReflection$$typecreator3$1.apply(ScalaReflection.scala:101) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe$lzycompute(TypeTags.scala:237) ~[scala-reflect-2.12.15.jar:na]
at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe(TypeTags.scala:237) ~[scala-reflect-2.12.15.jar:na]
at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf(ScalaReflection.scala:962) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf$(ScalaReflection.scala:960) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.localTypeOf(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$dataTypeFor$1(ScalaReflection.scala:101) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:73) ~[scala-reflect-2.12.15.jar:na]
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:947) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:946) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.dataTypeFor(ScalaReflection.scala:90) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerForType$1(ScalaReflection.scala:435) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:73) ~[scala-reflect-2.12.15.jar:na]
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:947) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:946) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.ScalaReflection$.serializerForType(ScalaReflection.scala:429) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$.apply(ExpressionEncoder.scala:55) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.Encoders$.STRING(Encoders.scala:93) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.createBaseDataset(CSVDataSource.scala:158) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.infer(CSVDataSource.scala:111) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.csv.CSVDataSource.inferSchema(CSVDataSource.scala:65) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:63) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:210) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at scala.Option.orElse(Option.scala:447) ~[scala-library-2.12.15.jar:na]
at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:207) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:411) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at scala.Option.getOrElse(Option.scala:189) ~[scala-library-2.12.15.jar:na]
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:571) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:481) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
at com.example.wuzzufdataanalysis.bootStrapData.BootStrapData.run(BootStrapData.java:45) ~[classes/:na]
at org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:777) [spring-boot-2.6.4.jar:2.6.4]
... 5 common frames omitted
pom.xml:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.6.4</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.example</groupId>
<artifactId>WuzzufDataAnalysis</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>WuzzufDataAnalysis</name>
<description>WuzzufDataAnalysis</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
<version>2.6.4</version>
<exclusions>
<exclusion>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
code: BootStrapData Class:
package com.example.wuzzufdataanalysis.bootStrapData;
import com.example.wuzzufdataanalysis.model.RowEntity;
import com.example.wuzzufdataanalysis.model.SummaryEntity;
import com.example.wuzzufdataanalysis.model.cleanDataEntity;
import com.example.wuzzufdataanalysis.repositories.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
@Component
public class BootStrapData implements CommandLineRunner {
private final RowEntityRepository rowEntityRepository;
private final SummaryEntityRepository summaryEntityRepository;
private final CompanyJobsEntityRepository CompanyJobsEntityRepository;
private final cleanDataEntityRepository cleanDataEntityRepository;
private final CompanyJobsEntityRepository companyJobsEntityRepository;
private final JobTitleEntityRepository jobTitleEntityRepository;
private final SkillsEntityRepository skillsEntityRepository;
public BootStrapData(RowEntityRepository rowEntityRepository, SummaryEntityRepository summaryEntityRepository, CompanyJobsEntityRepository aggEntityRepository, cleanDataEntityRepository cleanDataEntityRepositor, com.example.wuzzufdataanalysis.repositories.CompanyJobsEntityRepository companyJobsEntityRepository, JobTitleEntityRepository jobTitleEntityRepository, SkillsEntityRepository skillsEntityRepository) {
this.rowEntityRepository = rowEntityRepository;
this.summaryEntityRepository = summaryEntityRepository;
this.CompanyJobsEntityRepository = aggEntityRepository;
this.cleanDataEntityRepository = cleanDataEntityRepositor;
this.companyJobsEntityRepository = companyJobsEntityRepository;
this.jobTitleEntityRepository = jobTitleEntityRepository;
this.skillsEntityRepository = skillsEntityRepository;
}
@Override
public void run(String... args) throws Exception {
SparkSession sparkSession = SparkSession
.builder()
.master("local[2]")
.appName("Integrating Spring-boot with Apache Spark")
.getOrCreate();
Dataset<Row> dataSet = sparkSession.read().option("header", true).csv("src/main/resources/Wuzzuf_Jobs.csv");
dataSet.toLocalIterator().forEachRemaining(s->{
RowEntity rowEntity = new RowEntity(
s.getString(0),
s.getString(1),
s.getString(2),
s.getString(3),
s.getString(4),
s.getString(5),
s.getString(6),
s.getString(7)
);
rowEntityRepository.save(rowEntity);
});
Dataset<Row> dataSummary = dataSet.summary();
dataSummary.toLocalIterator().forEachRemaining(s->{
SummaryEntity summary = new SummaryEntity(
s.getString(0),
s.getString(1),
s.getString(2),
s.getString(3),
s.getString(4),
s.getString(5),
s.getString(6),
s.getString(7),
s.getString(8)
);
summaryEntityRepository.save(summary);
});
Dataset<Row> cleanData = dataSet.dropDuplicates().summary();
cleanData.toLocalIterator().forEachRemaining(s->{
cleanDataEntity cleanDataEntity = new cleanDataEntity(
s.getString(0),
s.getString(1),
s.getString(2),
s.getString(3),
s.getString(4),
s.getString(5),
s.getString(6),
s.getString(7),
s.getString(8)
);
cleanDataEntityRepository.save(cleanDataEntity);
});
}
}
RowEntity class:
package com.example.wuzzufdataanalysis.controllers;
import com.example.wuzzufdataanalysis.repositories.RowEntityRepository;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;
@Controller
public class RowEntityController {
private final RowEntityRepository rowEntityRepository;
public RowEntityController(RowEntityRepository rowEntityRepository) {
this.rowEntityRepository = rowEntityRepository;
}
@RequestMapping("/display-data")
public String getData(Model model){
model.addAttribute("rows", rowEntityRepository.findAll());
return "display-data/index";
}
}
RowEntityRepository interface:
package com.example.wuzzufdataanalysis.repositories;
import com.example.wuzzufdataanalysis.model.RowEntity;
import org.springframework.data.repository.CrudRepository;
public interface RowEntityRepository extends CrudRepository<RowEntity, Long> {
}
I know that this is a bit of a mess but it's for educational purposes only so I'm completely avoiding dependency injection.
Any help would be appreciated.