I'm working with a neo4j graph, using py2neo.
I understand that neo4j only supports simple properties, but I have a use case where I need to store dictionaries and pandas dataframes with a large number of nodes.
For dictionaries I can encode the dictionary as json and store it as a Node property, i.e. Node["dict"] = json.dumps(dict)
and then json.loads(dict)
to work with the dictionary when retrieving the node.
I could do the same with the pandas data frame (i.e. Node["dict"] = json.dumps(df.to_dict())
), but it doesn't seem a particularly efficient way of doing it.
Does anyone have suggestions on the best practice for working with a fairly large database of Python objects and neo4j? I'd like to be able to quickly access the Python objects from the neo4j database and work with/update them.
Example below uses the simple json.loads
approach as a starter.
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
import json
import logging
import pandas as pd
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.DEBUG)
db = Graph("bolt://192.168.X.X:7687", username = "XXX", password = "XXX")
class CurrentUser():
def __init__(self, username, sender_id):
# get node from database
query = """MATCH (u:User)
WHERE u.SenderID = {} and u.UserName = "{}"
RETURN u""".format(sender_id, username)
self.user_node = db.evaluate(query)
self.DF = pd.DataFrame()
def add_dict(self, dict):
self.user_node['dict'] = json.dumps(dict)
db.push(self.user_node)
def add_dataframe(self, s):
self.DF = self.DF.append(s, ignore_index=False)
self.user_node['df'] = json.dumps(self.DF.to_dict())
db.push(self.user_node)
def create_nodes():
query = """CREATE(u1:User {UserName: "Dave", SenderID: 10})"""
db.run(query)
create_nodes()
user = CurrentUser("Dave", 10)
dict = {"key": 10}
user.add_dict(dict)
pd_dict = {"val1": 1, "val2": 2}
s = pd.Series(pd_dict, name=2)
user.add_dataframe(s)