2

I'm using the following repo to run Spark (2.4.7) and Livy (0.7).

The curl commands shown on the repo works fine, and it seems that everything is up and running.

I wrote a simple word counting maven Spark Java program and used Livy client to submit it as a Spark job through Livy.

My Java word counting:

package spark;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.*;
import org.apache.livy.Job;
import org.apache.livy.JobContext;
import scala.Tuple2;

public final class JavaWordCount implements Job<Double> {

    private static final long serialVersionUID = 4870271814150948504L;
    private static final Pattern SPACE = Pattern.compile(" ");

    @Override
    public Double call(JobContext ctx) throws Exception {
        count(ctx);
        return 0.7;
    }

    public void count(JobContext ctx){

        //JavaRDD<String> lines = ctx.textFile(args[0], 1);
        JavaRDD<String> lines = ctx.sc().parallelize(Arrays.asList("It is close to midnight and something evil is lurking in the dark".split(" ")));


        JavaRDD<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String s) {
                return Arrays.asList(SPACE.split(s)).iterator();
            }
        });

        JavaPairRDD<String, Integer> ones = words.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(String s) {
                return new Tuple2<>(s, 1);
            }
        });

        JavaPairRDD<String, Integer> counts = ones.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        });

        List<Tuple2<String, Integer>> output = counts.collect();
        for (Tuple2<?, ?> tuple : output) {
            System.out.println(tuple._1() + ": " + tuple._2());
        }
    }
}

My Livy client:

package spark;

import org.apache.livy.Job;
import org.apache.livy.LivyClient;
import org.apache.livy.LivyClientBuilder;

import java.io.File;
import java.net.URI;

public final class SubmitJob {

    private static final String livyURI = "http://localhost:8998/";
    private static final String JAR_PATH = "/Users/.../spark-word-count/target/word-count-0.1-SNAPSHOT.jar";

    public static void main(String[] args) throws Exception {
        LivyClient livyClient = new LivyClientBuilder()
        .setURI(new URI(livyURI)).build();
        
        try {
            System.err.printf("Uploading %s to the Spark context...\n", JAR_PATH);
            livyClient.uploadJar(new File(JAR_PATH));
            
            System.err.printf("Running JavaWordCount...\n", JAR_PATH);
            double pi = livyClient.submit(new JavaWordCount()).get();
        
            System.out.println("Pi is roughly: " + pi);
        } finally {
            livyClient.stop(true);
        }

    }
}

When I run my client I get the following error:

Exception in thread "main" java.util.concurrent.ExecutionException: java.lang.RuntimeException: org.apache.livy.shaded.kryo.kryo.KryoException: Unable to find class: spark.JavaWordCount at org.apache.livy.shaded.kryo.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138) at org.apache.livy.shaded.kryo.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115)

Exception in thread "main" java.util.concurrent.ExecutionException: java.lang.RuntimeException: org.apache.livy.shaded.kryo.kryo.KryoException: Unable to find class: spark.JavaWordCount at org.apache.livy.shaded.kryo.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:138) at org.apache.livy.shaded.kryo.kryo.util.DefaultClassResolver.readClass(DefaultClassResolver.java:115) at org.apache.livy.shaded.kryo.kryo.Kryo.readClass(Kryo.java:656) at org.apache.livy.shaded.kryo.kryo.Kryo.readClassAndObject(Kryo.java:767) at org.apache.livy.client.common.Serializer.deserialize(Serializer.java:63) at org.apache.livy.rsc.driver.BypassJob.call(BypassJob.java:39) at org.apache.livy.rsc.driver.BypassJob.call(BypassJob.java:27) at org.apache.livy.rsc.driver.JobWrapper.call(JobWrapper.java:64) at org.apache.livy.rsc.driver.BypassJobWrapper.call(BypassJobWrapper.java:45) at org.apache.livy.rsc.driver.BypassJobWrapper.call(BypassJobWrapper.java:27) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ClassNotFoundException: spark.JavaWordCount

How can I solve this error?

I read it might be because of livy.file.local-dir-whitelist. My Livy conf whitelist looks like this:

livy.file.local-dir-whitelist = /

I have tried to upload the Jar to the Livy container and put it under "/", and changed the JAR_PATH = "/word-count-0.1-SNAPSHOT.jar" on my client. I getting the same error...

How can I submit my Jar?

Oded
  • 336
  • 1
  • 3
  • 17

0 Answers0