I am trying to create a graph in Apache Spark Graphx using Scala. My code so far is:
import org.apache.spark._
import org.apache.spark.graphx._
case class EdgesCl(Source:Long, Destination:Long, SourceLayer:Long, DestLayer:Long, Weight:Int = 1)
object actual
{
def parseEdgesCl(str: String): EdgesCl =
{
val line = str.split(",")
EdgesCl(line(0).toLong, line(1).toLong, line(2).toLong, line(3).toLong)
}
def main(args:Array[String]) {
val conf = new SparkConf().setMaster("local").setAppName("SparkCommunityDetection")
val sc = new SparkContext(conf)
//Create RDD with Network data
val textRDD = sc.textFile("/home/input_Files/test_net.csv")
val EdgesClRDD = textRDD.map(parseEdgesCl).cache()
val nodes = EdgesClRDD.map(node => (node.Source, node.SourceLayer)).distinct
nodes.take(1)
val edges_lines = EdgesClRDD.map(edge => ((edge.Source, edge.Destination), edge.SourceLayer, edge.DestLayer))
// Defining a default vertex called nowhere
val noedge = "noedge"
val edges = edges_lines.map { case ((source, dest), s_layer, d_layer) => Edge(source, dest, s_layer) }
//Defining the Graph
val graph = Graph(nodes, edges, noedge)
}
}
I get the following error:
Error:(67, 23) type mismatch;
found : org.apache.spark.rdd.RDD[(Long, Long)]
required: org.apache.spark.rdd.RDD[(org.apache.spark.graphx.VertexId, Any)]
Note: (Long, Long) <: (org.apache.spark.graphx.VertexId, Any), but class RDD is invariant in type T.
You may wish to define T as +T instead. (SLS 4.5)
Error occurred in an application involving default arguments.
val graph = Graph(nodes, edges, noedge)
How can I convert the "nodes" in the correct data type, so that I can make the graph?