Here you go Chaitanya
# custom class sub-classed from Graph for low-memory requirements as you don't need edge weights
class ThinGraph(nx.Graph):
all_edge_dict = {"weight": 1}
def single_edge_dict(self):
return self.all_edge_dict
edge_attr_dict_factory = single_edge_dict
# create a blank Graph object from the Networkx library
G = ThinGraph()
# add nodes to G
nodes_array =[]
customer_id_phone_edges_array = []
customer_id_email_edges_array = []
for _,row in df.iterrows():
nodes_array.append((row["cust_id"],{"label":"Customer ID"}))
nodes_array.append((row["phone"],{"label":"Phone"}))
nodes_array.append((row["email"],{"label":"Email"}))
# Create your customer_ID, phone graph edges
customer_id_phone_edges_array.append((row["cust_id"],row["phone"]))
# Create your customer_ID, email graph edges
customer_id_email_edges_array.append((row["cust_id"],row["email"]))
#Add the nodes and edges
G.add_nodes_from(nodes_array)
G.add_edges_from(customer_id_phone_edges_array)
G.add_edges_from(customer_id_email_edges_array)
# delete objects to free up memory
del nodes_array
del customer_id_phone_edges_array
del customer_id_email_edges_array
# run the connected components algorithm
components = nx.connected_components(G)
comp_dict = {idx: comp for idx, comp in enumerate(components)}
attr = {n: comp_id for comp_id, nodes in comp_dict.items() for n in nodes}
nx.set_node_attributes(G, attr,"component")
# Examine the result
G.nodes(data=True)
For your given data, you will get the output
NodeDataView({1: {'label': 'Customer ID', 'component': 0}, 678: {'label': 'Phone', 'component': 0}, 'a': {'label': 'Email', 'component': 0}, 2: {'label': 'Customer ID', 'component': 0}, 'b': {'label': 'Email', 'component': 0}, 3: {'label': 'Customer ID', 'component': 0}, 987: {'label': 'Phone', 'component': 0}, 4: {'label': 'Customer ID', 'component': 1}, 456: {'label': 'Phone', 'component': 1}, 'd': {'label': 'Email', 'component': 1}, 5: {'label': 'Customer ID', 'component': 1}, 654: {'label': 'Phone', 'component': 1}, 7: {'label': 'Customer ID', 'component': 1}, 'f': {'label': 'Email', 'component': 1}})