I am new to Apache Spark GraphX and I am trying to create a graph using Java. I have a road network EDGE dataset which consists of Edge_id (INT), Source_ID(INT), Destination_ID (INT), and Edge_Length(Double). I created a class name called EdgeNetwork, which I am using as a JavaRDD type. After creating the RDD of the class, I created a list to store only 3 columns except for the Edge_Id. I then created an RDD of the Edge List. However, when I try to create a graph, I get the error and cannot proceed further.
Is the process correct or not?
package graphXApi;
1. EdgeNetwork.java
import java.io.Serializable;
public class EdgeNetwork implements Serializable {
private Integer edge_id;
private Integer src_id;
private Integer dest_id;
private Double edge_Length;
public Integer getEdge_id() {
return edge_id;
}
public void setEdge_id(Integer edge_id) {
this.edge_id = edge_id;
}
public Integer getSrc_id() {
return src_id;
}
public void setSrc_id(Integer src_id) {
this.src_id = src_id;
}
public Integer getDest_id() {
return dest_id;
}
public void setDest_id(Integer dest_id) {
this.dest_id = dest_id;
}
public Double getEdge_Length() {
return edge_Length;
}
public void setEdge_Length(Double edge_Length) {
this.edge_Length = edge_Length;
}
}
2. GraphDatasetParsingFile
public class GraphDatasetParsingFile {
public static <T> void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("GraphFileReadClass");
JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
ClassTag<String> stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
JavaRDD<String> textFile = javaSparkContext.textFile("C:\\Users\\Aavash\\Desktop\\SFEdge.txt");
JavaRDD<EdgeNetwork> edgeFileRDD = textFile.map(line -> {
String[] parts = line.split(" ");
EdgeNetwork edgeNet = new EdgeNetwork();
edgeNet.setEdge_id(Integer.parseInt(parts[0]));
edgeNet.setSrc_id(Integer.parseInt(parts[1]));
edgeNet.setDest_id(Integer.parseInt(parts[2]));
edgeNet.setEdge_Length(Double.parseDouble(parts[3]));
return edgeNet;
});
edgeFileRDD.foreach(edgeInfo -> System.err
.println("Edge_ID:" + edgeInfo.getEdge_id() + " " + "Source: " + edgeInfo.getSrc_id() + " "
+ "Destination: " + edgeInfo.getDest_id() + " " + "EdgeLength: " + edgeInfo.getEdge_Length()));
JavaRDD<EdgeNetwork> edgesPart = textFile.mapPartitions(p -> {
ArrayList<EdgeNetwork> edgeList = new ArrayList<EdgeNetwork>();
while (p.hasNext()) {
String[] parts = p.next().split(" ");
EdgeNetwork edgeNet = new EdgeNetwork();
edgeNet.setEdge_id(Integer.parseInt(parts[0]));
edgeNet.setSrc_id(Integer.parseInt(parts[1]));
edgeNet.setDest_id(Integer.parseInt(parts[2]));
edgeNet.setEdge_Length(Double.parseDouble(parts[3]));
edgeList.add(edgeNet);
}
return edgeList.iterator();
});
edgesPart.foreach(edgeInfo -> System.out
.println("Edge_ID:" + edgeInfo.getEdge_id() + " " + "Source: " + edgeInfo.getSrc_id() + " "
+ "Destination: " + edgeInfo.getDest_id() + " " + "EdgeLength: " + edgeInfo.getEdge_Length()));
List<Edge<Integer>> edges = new ArrayList<Edge<Integer>>();
edgesPart.foreach(edge -> {
Collection<? extends Edge<Integer>> collection = (Collection<? extends Edge<Integer>>) new Edge<Double>(
edge.getSrc_id(), edge.getDest_id(), edge.getEdge_Length());
edges.addAll(collection);
});
JavaRDD<Edge<Integer>> edgesRDD1 = javaSparkContext.parallelize(edges);
Graph<Integer, Integer> graph2 = Graph.fromEdgeTuples(edgesRDD1.rdd(), " ", StorageLevel.MEMORY_ONLY(),
StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
}
}