3

I have two dataframe with this schema:

     edges
     |-- src: string (nullable = true) 
     |-- dst: string (nullable = true) 
     |-- relationship: struct (nullable = false) 
     | |-- business_id: string (nullable = true) 
     | |-- normalized_influence: double (nullable = true) root 

    vertices
    |-- id: string (nullable = true) 
    |-- state: boolean (nullable = true)

To have a graph I converted these dataframe in this way:

import org.apache.spark.graphx._
import scala.util.hashing.MurmurHash3

case class Relationship(business_id: String, normalized_influence: Double)
case class MyEdge(src: String, dst: String, relationship: Relationship)
val edgesRDD: RDD[Edge[Relationship]] = communityEdgeDF.as[MyEdge].rdd.map  { edge =>
Edge(
    MurmurHash3.stringHash(edge.src).toLong, 
    MurmurHash3.stringHash(edge.dst).toLong,
    edge.relationship
   )
} 

case class MyVertex(id: String, state: Boolean)
val verticesRDD : RDD[(VertexId, (String, Boolean))] =   communityVertexDF.as[MyVertex].rdd.map { vertex =>
 (
  MurmurHash3.stringHash(vertex.id).toLong,
  (vertex.id, vertex.state)
 )   
}

val graphX = Graph(verticesRDD, edgesRDD) 

This is a part of the output of the vertices

res6: Array[(org.apache.spark.graphx.VertexId, (String, Boolean))] = Array((1874415454,(KRZALzi0ZgrGYyjZNg72_g,false)), (1216259959,(JiFBQ_-vWgJtRZEEruSStg,false)), (-763896211,(LZge-YpVL0ukJVD2nw5sag,false)), (-2032982683,(BHP3LVkTOfh3w4UIhgqItg,false)), (844547135,(JRC3La2fiNkK0VU7qZ9vyQ,false)) 

and this the edges:

res3: Array[org.apache.spark.graphx.Edge[Relationship]] = Array(Edge(-268040669,1495494297,Relationship(cJWbbvGmyhFiBpG_5hf5LA,0.0017532149785518423)), Edge(-268040669,-125364603,Relationship(cJWbbvGmyhFiBpG_5hf5LA,0.0017532149785518423))

But doing this:

graphX.vertices.collect

I have this wrong output:

 Array((1981723824,null), (-333497649,null), (-597749329,null), (451246392,null), (-1287295481,null), (1013727024,null), (-194805089,null), (1621180464,null), (1874415454,(KRZALzi0ZgrGYyjZNg72_g,false)), (1539311488,null)

What is the problem? Was I wrong to build Graph?

alukard990
  • 811
  • 2
  • 9
  • 14

0 Answers0