Dears,
I have a graph with 3 million nodes, consisting of a lot of subgraphs with 2/3/4 nodes and some up to 8000/9000 nodes. My desire is to split this large graph into subgraphs each having a maximum of 5 nodes.
For each subgraph that already fulfills this condition - I leave it as it is. For each that has more than 5 nodes I find the edge with smallest weight and remove it (the graph is weighted).
However I think my implementation is a complete disaster and the runtime will be longer than my life.
def return_ab(dc):
#Expects, return the key with the minimum value
mn = min(list(dc.values()))
#print(mn)
return list(dc.keys())[list(dc.values()).index(mn)]
ls_gr_1 = []
rem_edges_1 = []
def prune(grph):
if grph.size() <= 5:
ls_gr_1.append(grph)
else:
ls = nx.get_edge_attributes(grph, 'DURATION')
min = return_ab(ls)
#print(min)
unfrozen_graph = nx.Graph(grph)
#print(min)
rem_edges_1.append(min)
unfrozen_graph.remove_edge(*min)
for c in nx.connected_components(unfrozen_graph):
prune(unfrozen_graph.subgraph(c))
I would appreciate a point toward how to achieve this in a more elegant and faster way...
EDIT:
I tried through iteratian and that seems a tad bit faster but is still way too slow:
def traverse(graph):
components = [graph.subgraph(c).copy() for c in nx.connected_components(graph)]
large_ones = components
small_ones = []
while large_ones:
graph_to_work = large_ones.pop()
if len(graph_to_work) <= 5:
small_ones.append(graph_to_work)
else:
ls = nx.get_edge_attributes(graph_to_work, 'DURATION')
min = return_ab(ls)
unfrozen_graph = nx.Graph(graph_to_work)
unfrozen_graph.remove_edge(*min)
new_components = [unfrozen_graph.subgraph(c).copy() for c in nx.connected_components(unfrozen_graph)]
large_ones.extend(new_components)
return small_ones