1

I'm trying to create a web service with spring boot to do some data analysis with spark, but spark is a pain to setup and manage with all the configurations and dependency management.

I'm facing this error when I run the application

Stack trace:

java.lang.IllegalStateException: Failed to execute CommandLineRunner
    at org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:780) [spring-boot-2.6.4.jar:2.6.4]
    at org.springframework.boot.SpringApplication.callRunners(SpringApplication.java:761) [spring-boot-2.6.4.jar:2.6.4]
    at org.springframework.boot.SpringApplication.run(SpringApplication.java:310) [spring-boot-2.6.4.jar:2.6.4]
    at org.springframework.boot.SpringApplication.run(SpringApplication.java:1312) [spring-boot-2.6.4.jar:2.6.4]
    at org.springframework.boot.SpringApplication.run(SpringApplication.java:1301) [spring-boot-2.6.4.jar:2.6.4]
    at com.example.wuzzufdataanalysis.WuzzufDataAnalysisApplication.main(WuzzufDataAnalysisApplication.java:10) [classes/:na]
Caused by: java.lang.RuntimeException: error reading Scala signature of org.apache.spark.sql.package: unsafe symbol Unstable (child of package annotation) in runtime reflection universe
    at scala.reflect.internal.pickling.UnPickler.unpickle(UnPickler.scala:51) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.JavaMirrors$JavaMirror.unpickleClass(JavaMirrors.scala:660) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.$anonfun$complete$2(SymbolLoaders.scala:37) ~[scala-reflect-2.12.15.jar:na]
    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) ~[scala-library-2.12.15.jar:na]
    at scala.reflect.internal.SymbolTable.slowButSafeEnteringPhaseNotLaterThan(SymbolTable.scala:333) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SymbolLoaders$TopClassCompleter.complete(SymbolLoaders.scala:34) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1551) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1514) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:221) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.$anonfun$info$1(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:149) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.info(SynchronizedSymbols.scala:221) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.SymbolTable.openPackageModule(SymbolTable.scala:356) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.SymbolTable.openPackageModule(SymbolTable.scala:411) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SymbolLoaders$LazyPackageType.$anonfun$complete$3(SymbolLoaders.scala:83) ~[scala-reflect-2.12.15.jar:na]
    at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) ~[scala-library-2.12.15.jar:na]
    at scala.reflect.internal.SymbolTable.slowButSafeEnteringPhaseNotLaterThan(SymbolTable.scala:333) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SymbolLoaders$LazyPackageType.complete(SymbolLoaders.scala:80) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1551) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1514) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$9.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:209) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.$anonfun$info$1(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:149) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$9.info(SynchronizedSymbols.scala:209) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$TypeRef.baseClasses(Types.scala:2280) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.tpe.FindMembers$FindMemberBase.init(FindMembers.scala:36) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.tpe.FindMembers$FindMember.init(FindMembers.scala:257) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$Type.$anonfun$findMember$1(Types.scala:1042) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$Type.findMemberInternal$1(Types.scala:1041) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$Type.findMember(Types.scala:1046) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$Type.memberBasedOnName(Types.scala:672) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Types$Type.member(Types.scala:636) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Mirrors$RootsBase.getModuleOrClass(Mirrors.scala:55) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:51) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.internal.Mirrors$RootsBase.staticClass(Mirrors.scala:29) ~[scala-reflect-2.12.15.jar:na]
    at org.apache.spark.sql.catalyst.ScalaReflection$$typecreator3$1.apply(ScalaReflection.scala:101) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe$lzycompute(TypeTags.scala:237) ~[scala-reflect-2.12.15.jar:na]
    at scala.reflect.api.TypeTags$WeakTypeTagImpl.tpe(TypeTags.scala:237) ~[scala-reflect-2.12.15.jar:na]
    at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf(ScalaReflection.scala:962) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection.localTypeOf$(ScalaReflection.scala:960) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.localTypeOf(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$dataTypeFor$1(ScalaReflection.scala:101) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:73) ~[scala-reflect-2.12.15.jar:na]
    at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:947) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:946) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.dataTypeFor(ScalaReflection.scala:90) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerForType$1(ScalaReflection.scala:435) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:73) ~[scala-reflect-2.12.15.jar:na]
    at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:947) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:946) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.cleanUpReflectionObjects(ScalaReflection.scala:51) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.ScalaReflection$.serializerForType(ScalaReflection.scala:429) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$.apply(ExpressionEncoder.scala:55) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.Encoders$.STRING(Encoders.scala:93) ~[spark-catalyst_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.createBaseDataset(CSVDataSource.scala:158) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.csv.TextInputCSVDataSource$.infer(CSVDataSource.scala:111) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.csv.CSVDataSource.inferSchema(CSVDataSource.scala:65) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:63) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:210) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at scala.Option.orElse(Option.scala:447) ~[scala-library-2.12.15.jar:na]
    at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:207) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:411) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:274) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:245) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at scala.Option.getOrElse(Option.scala:189) ~[scala-library-2.12.15.jar:na]
    at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:245) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:571) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:481) ~[spark-sql_2.12-3.2.1.jar:3.2.1]
    at com.example.wuzzufdataanalysis.bootStrapData.BootStrapData.run(BootStrapData.java:45) ~[classes/:na]
    at org.springframework.boot.SpringApplication.callRunner(SpringApplication.java:777) [spring-boot-2.6.4.jar:2.6.4]
    ... 5 common frames omitted

pom.xml:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.6.4</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.example</groupId>
    <artifactId>WuzzufDataAnalysis</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>WuzzufDataAnalysis</name>
    <description>WuzzufDataAnalysis</description>
    <properties>
        <java.version>1.8</java.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>com.h2database</groupId>
            <artifactId>h2</artifactId>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-thymeleaf</artifactId>
            <version>2.6.4</version>
            <exclusions>
                <exclusion>
                    <groupId>ch.qos.logback</groupId>
                    <artifactId>logback-classic</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

code: BootStrapData Class:

package com.example.wuzzufdataanalysis.bootStrapData;

import com.example.wuzzufdataanalysis.model.RowEntity;
import com.example.wuzzufdataanalysis.model.SummaryEntity;
import com.example.wuzzufdataanalysis.model.cleanDataEntity;
import com.example.wuzzufdataanalysis.repositories.*;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;


@Component
public class BootStrapData implements CommandLineRunner {

    private final RowEntityRepository rowEntityRepository;
    private final SummaryEntityRepository summaryEntityRepository;
    private final CompanyJobsEntityRepository CompanyJobsEntityRepository;
    private final cleanDataEntityRepository cleanDataEntityRepository;
    private final CompanyJobsEntityRepository companyJobsEntityRepository;
    private final JobTitleEntityRepository jobTitleEntityRepository;
    private final SkillsEntityRepository skillsEntityRepository;


    public BootStrapData(RowEntityRepository rowEntityRepository, SummaryEntityRepository summaryEntityRepository, CompanyJobsEntityRepository aggEntityRepository, cleanDataEntityRepository cleanDataEntityRepositor, com.example.wuzzufdataanalysis.repositories.CompanyJobsEntityRepository companyJobsEntityRepository, JobTitleEntityRepository jobTitleEntityRepository, SkillsEntityRepository skillsEntityRepository) {
        this.rowEntityRepository = rowEntityRepository;
        this.summaryEntityRepository = summaryEntityRepository;
        this.CompanyJobsEntityRepository = aggEntityRepository;
        this.cleanDataEntityRepository = cleanDataEntityRepositor;
        this.companyJobsEntityRepository = companyJobsEntityRepository;
        this.jobTitleEntityRepository = jobTitleEntityRepository;
        this.skillsEntityRepository = skillsEntityRepository;
    }


    @Override
    public void run(String... args) throws Exception {

        SparkSession sparkSession = SparkSession
                .builder()
                .master("local[2]")
                .appName("Integrating Spring-boot with Apache Spark")
                .getOrCreate();
        Dataset<Row> dataSet = sparkSession.read().option("header", true).csv("src/main/resources/Wuzzuf_Jobs.csv");
        dataSet.toLocalIterator().forEachRemaining(s->{
            RowEntity rowEntity = new RowEntity(
                    s.getString(0),
                    s.getString(1),
                    s.getString(2),
                    s.getString(3),
                    s.getString(4),
                    s.getString(5),
                    s.getString(6),
                    s.getString(7)
            );

            rowEntityRepository.save(rowEntity);

        });

        Dataset<Row> dataSummary = dataSet.summary();
        dataSummary.toLocalIterator().forEachRemaining(s->{
            SummaryEntity summary = new SummaryEntity(
                    s.getString(0),
                    s.getString(1),
                    s.getString(2),
                    s.getString(3),
                    s.getString(4),
                    s.getString(5),
                    s.getString(6),
                    s.getString(7),
                    s.getString(8)
            );
            summaryEntityRepository.save(summary);
        });

        Dataset<Row> cleanData = dataSet.dropDuplicates().summary();
        cleanData.toLocalIterator().forEachRemaining(s->{
            cleanDataEntity cleanDataEntity = new cleanDataEntity(
                    s.getString(0),
                    s.getString(1),
                    s.getString(2),
                    s.getString(3),
                    s.getString(4),
                    s.getString(5),
                    s.getString(6),
                    s.getString(7),
                    s.getString(8)
            );
            cleanDataEntityRepository.save(cleanDataEntity);
        });

    }
}

RowEntity class:

package com.example.wuzzufdataanalysis.controllers;


import com.example.wuzzufdataanalysis.repositories.RowEntityRepository;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.RequestMapping;

@Controller
public class RowEntityController {
    private final RowEntityRepository rowEntityRepository;

    public RowEntityController(RowEntityRepository rowEntityRepository) {
        this.rowEntityRepository = rowEntityRepository;
    }

    @RequestMapping("/display-data")
    public String getData(Model model){
        model.addAttribute("rows", rowEntityRepository.findAll());

        return "display-data/index";

    }
}

RowEntityRepository interface:

package com.example.wuzzufdataanalysis.repositories;

import com.example.wuzzufdataanalysis.model.RowEntity;
import org.springframework.data.repository.CrudRepository;

public interface RowEntityRepository extends CrudRepository<RowEntity, Long> {
}

I know that this is a bit of a mess but it's for educational purposes only so I'm completely avoiding dependency injection.

Any help would be appreciated.

  • `Dataset dataSet = sparkSession.read().option("header", true).csv("src/main/resources/Wuzzuf_Jobs.csv");` are you able to read this csv file? – Abrar Ansari Mar 07 '22 at 14:29
  • @AbrarAnsari That's actually the line generating the error according to the stack trace at com.example.wuzzufdataanalysis.bootStrapData.BootStrapData.run(BootStrapData.java:45) ~[classes/:na] – Ramez Essam Mar 07 '22 at 14:38
  • check this out `https://stackoverflow.com/questions/56444263/what-does-this-mean-assertionerror-assertion-failed-unsafe-symbol-unstable` – Abrar Ansari Mar 07 '22 at 14:41
  • @AbrarAnsari Yes it was indeed some class not found exception nested in there. thanks – Ramez Essam Mar 08 '22 at 08:07

0 Answers0