1

I am new to Apache Spark GraphX and I am trying to create a graph using Java. I have a road network EDGE dataset which consists of Edge_id (INT), Source_ID(INT), Destination_ID (INT), and Edge_Length(Double). I created a class name called EdgeNetwork, which I am using as a JavaRDD type. After creating the RDD of the class, I created a list to store only 3 columns except for the Edge_Id. I then created an RDD of the Edge List. However, when I try to create a graph, I get the error and cannot proceed further.

Is the process correct or not?

package graphXApi;

1. EdgeNetwork.java

import java.io.Serializable;
public class EdgeNetwork implements Serializable {

    private Integer edge_id;
    private Integer src_id;
    private Integer dest_id;
    private Double edge_Length;

    public Integer getEdge_id() {
        return edge_id;
    }

    public void setEdge_id(Integer edge_id) {
        this.edge_id = edge_id;
    }

    public Integer getSrc_id() {
        return src_id;
    }

    public void setSrc_id(Integer src_id) {
        this.src_id = src_id;
    }

    public Integer getDest_id() {
        return dest_id;
    }

    public void setDest_id(Integer dest_id) {
        this.dest_id = dest_id;
    }

    public Double getEdge_Length() {
        return edge_Length;
    }

    public void setEdge_Length(Double edge_Length) {
        this.edge_Length = edge_Length;
    }

}

2. GraphDatasetParsingFile

public class GraphDatasetParsingFile {
    public static <T> void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("GraphFileReadClass");
        JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
        ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);

        
        JavaRDD<String> textFile = javaSparkContext.textFile("C:\\Users\\Aavash\\Desktop\\SFEdge.txt");

        
        JavaRDD<EdgeNetwork> edgeFileRDD = textFile.map(line -> {
            String[] parts = line.split(" ");
            EdgeNetwork edgeNet = new EdgeNetwork();
            edgeNet.setEdge_id(Integer.parseInt(parts[0]));
            edgeNet.setSrc_id(Integer.parseInt(parts[1]));
            edgeNet.setDest_id(Integer.parseInt(parts[2]));
            edgeNet.setEdge_Length(Double.parseDouble(parts[3]));
            return edgeNet;
        });

        
        edgeFileRDD.foreach(edgeInfo -> System.err
                .println("Edge_ID:" + edgeInfo.getEdge_id() + " " + "Source: " + edgeInfo.getSrc_id() + " "
                        + "Destination: " + edgeInfo.getDest_id() + " " + "EdgeLength: " + edgeInfo.getEdge_Length()));

        
        JavaRDD<EdgeNetwork> edgesPart = textFile.mapPartitions(p -> {
            ArrayList<EdgeNetwork> edgeList = new ArrayList<EdgeNetwork>();
            while (p.hasNext()) {
                String[] parts = p.next().split(" ");
                EdgeNetwork edgeNet = new EdgeNetwork();
                edgeNet.setEdge_id(Integer.parseInt(parts[0]));
                edgeNet.setSrc_id(Integer.parseInt(parts[1]));
                edgeNet.setDest_id(Integer.parseInt(parts[2]));
                edgeNet.setEdge_Length(Double.parseDouble(parts[3]));
                edgeList.add(edgeNet);

            }
            return edgeList.iterator();
        });
        edgesPart.foreach(edgeInfo -> System.out
                .println("Edge_ID:" + edgeInfo.getEdge_id() + " " + "Source: " + edgeInfo.getSrc_id() + " "
                        + "Destination: " + edgeInfo.getDest_id() + " " + "EdgeLength: " + edgeInfo.getEdge_Length()));

        
        List<Edge<Integer>> edges = new ArrayList<Edge<Integer>>();
        edgesPart.foreach(edge -> {
            Collection<? extends Edge<Integer>> collection = (Collection<? extends Edge<Integer>>) new Edge<Double>(
                    edge.getSrc_id(), edge.getDest_id(), edge.getEdge_Length());
            edges.addAll(collection);
        });
        JavaRDD<Edge<Integer>> edgesRDD1 = javaSparkContext.parallelize(edges);

        Graph<Integer, Integer> graph2 = Graph.fromEdgeTuples(edgesRDD1.rdd(), " ", StorageLevel.MEMORY_ONLY(),
                StorageLevel.MEMORY_ONLY(), stringTag, stringTag);

    }
}
James Z
  • 12,209
  • 10
  • 24
  • 44
  • Please, copy and paste your code in properly formatted blocks here. Reading code from an image is hard, copying and pasting it somewhere else for tests is even harder. – Hristo Iliev Sep 13 '21 at 08:08
  • @HristoIliev Thanks for your suggestion, I have replaced the image with the code. Could you please check? – Aavash Bhandari Sep 13 '21 at 08:20

0 Answers0