1

I want to run this source code of K-Means clustering (mlib) on Spark 1.3.1 with Java :

import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.clustering.KMeans;
import org.apache.spark.mllib.clustering.KMeansModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
/**
 * Example using MLlib KMeans from Java.
*/
public final class JavaKMeans {
    private static class ParsePoint implements Function<String, Vector> {
    private static final Pattern SPACE = Pattern.compile(" ");
    @Override
    public Vector call(String line) {
        String[] tok = SPACE.split(line);
        double[] point = new double[tok.length];
        for (int i = 0; i < tok.length; ++i) {
            point[i] = Double.parseDouble(tok[i]);
        }
        return Vectors.dense(point);
    }
}

public static void main(String[] args) {
    if (args.length < 3) {
        System.err.println(
                "Usage: JavaKMeans <input_file> <k> <max_iterations> [<runs>]");
        System.exit(1);
    }
    String inputFile = args[0];
    int k = Integer.parseInt(args[1]);
    int iterations = Integer.parseInt(args[2]);
    int runs = 1;
    if (args.length >= 4) {
        runs = Integer.parseInt(args[3]);
    }
    SparkConf sparkConf = new SparkConf().setAppName("JavaKMeans");
    JavaSparkContext sc = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = sc.textFile(inputFile);
    JavaRDD<Vector> points = lines.map(new ParsePoint());
    KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.K_MEANS_PARALLEL());
    System.out.println("Cluster centers:");
    for (Vector center : model.clusterCenters()) {
        System.out.println(" " + center);
    }
    double cost = model.computeCost(points.rdd());
    System.out.println("Cost: " + cost);
    sc.stop();
}

}

How can I run this source code in Spark? I ran it with Spark by using this command: JavaKMeans file.txt 4 100 [1]

But, this gave me an error: JavaKMeans: command not found

I also ran this command with Spark: ./bin/spark-submit --class JavaKMeans --master spark://zaki-Inspiron-3521:7077 JavaKMeans-1.0-SNAPSHOT.jar file.txt output

But it gave me error.

Manjunath Ballur
  • 6,287
  • 3
  • 37
  • 48

0 Answers0