I have 473639 nodes and 995863 parent->child relations in mysql table.
Using both normal and batch operation to fetch data, create node and relation, but both type of operations are slow. Is there any workaround to make this process faster?
Code is given below
import MySQLdb as my
from py2neo import neo4j, node, rel
def conn(query):
db = my.connect(host='localhost',
user='root',
passwd='root',
db='localdb')
cur = db.cursor()
cur.execute(query)
return db, cur
query = 'select * table1'
db, cur = conn(query)
d = dict()
graph = neo4j.GraphDatabaseService()
batch = neo4j.WriteBatch(graph)
def create_node(a):
if a not in d:
try:
A = graph.create(node(name=str(a)))
# for batch operation
#A = batch.create(node(name=str(a)))
d[a] = A
except Exception, e:
print e
else:
A = d[a]
return A
cnt = 1
# create node
for row in cur.fetchall():
a,b = get_cat(row[0]), get_cat(row[1])
try:
A, B = create_node(a), create_node(b)
rels.append((A,B))
except Exception, e:
print e
#create relations
for item in rels:
a = item[0]
b = item[1]
graph.create(rel(a,"is parent of",b))
# for batch operation
#batch.create(node(name=str(a)))
#res = batch.submit()
#print res
print 'end'