I have created some code to calculate weighted global efficiency, however, the code is taking too long to run. I need to either make the code a lot more efficient or I need to find a more efficient way of calculating it for large data sets (up to 6000 points).
I have edited the code a lot and I have tried igraph (no functions for weighted global efficiency) but nothing is making it fast enough for me to get the calculations done. My current code is all displayed below
import networkx as nx
import numpy as np
from networkx import algorithms
from networkx.algorithms import efficiency
from networkx.algorithms.efficiency import global_efficiency
from networkx.exception import NetworkXNoPath
import pandas as pd
from tqdm import tqdm
from itertools import permutations
import time
from multiprocessing import Pool, cpu_count
def efficiency_weighted(G, u, v, weight):
try:
eff = 1 / nx.shortest_path_length(G, u, v, weight='weight')
except NetworkXNoPath:
eff = 0
return eff
def global_efficiency_weighted(G):
n = len(G)
denom = n * (n - 1)
if denom != 0:
g_eff = sum(efficiency_weighted(G, u, v, weight='weight') for u, v in permutations(G, 2)) / denom
else:
g_eff = 0
return g_eff
data=pd.read_csv("lobe2 1.csv")
lol1 = data.values.tolist()
data=pd.read_csv("lobe2 2.csv")
lol2 = data.values.tolist()
data=pd.read_csv("lobe2 3.csv")
lol3 = data.values.tolist()
data=pd.read_csv("lobe2 4.csv")
lol4 = data.values.tolist()
data=pd.read_csv("lobe2 5.csv")
lol5 = data.values.tolist()
data=pd.read_csv("lobe2 6.csv")
lol6 = data.values.tolist()
combos=lol1+lol2+lol3+lol4 #lists to be used for deletion in the matrix
datasafe=pd.read_csv("b1.csv", index_col=0)
##uncommennt this section for sample benchmarking
#size = 25
#subset = [c[0] for c in combos[0:size]]
#datasafe = datasafe.loc[subset, :]
#datasafe = datasafe[subset]
#combos = combos[0:size]
################################
########## Single core
################################
tic = time.time()
GE_list=[]
for combo in tqdm(combos):
df_temp = datasafe.copy()
df_temp.loc[combo, :] = 0
df_temp[combo] = 0
g=nx.from_pandas_adjacency(df_temp)
ge=global_efficiency_weighted(g)
# ge=global_efficiency(g) #uncomment to test non-weighted
GE_list.append(ge)
toc = time.time()
single = toc-tic
print("results for single core")
print(GE_list)
################################
########## Multi core
################################
def multi_global(datasafe,combo):
df_temp = datasafe.copy()
df_temp.loc[combo, :] = 0
df_temp[combo] = 0
g=nx.from_pandas_adjacency(df_temp) #omptimise by zoring on adjacency
ge=global_efficiency_weighted(g)
return ge
tic = time.time()
cpu = cpu_count()-1
pool = Pool(processes=cpu)
results = [pool.apply(multi_global, args=(datasafe, combo)) for combo in tqdm(combos)]
pool.close()
pool.join()
pool.terminate()
toc = time.time()
multi = toc-tic
################################
########## Multi core async
################################
def multi_global_as(datasafe,combo):
df_temp = datasafe.copy()
df_temp.loc[combo, :] = 0
df_temp[combo] = 0
g=nx.from_pandas_adjacency(df_temp) #omptimise by zoring on adjacency
ge=global_efficiency_weighted(g)
pbar.update(1)
return combo,ge
tic = time.time()
cpu = cpu_count()-1
pool = Pool(processes=cpu)
pbar = tqdm(total=int(len(combos)/cpu))
results = [pool.apply_async(multi_global_as, args=(datasafe, combo)) for combo in combos]
res=[result.get() for result in results]
pool.close()
pool.join()
pool.terminate()
pbar.close()
toc = time.time()
multi_as = toc-tic
print("results for # cpu: " + str(cpu))
print(results)
print("time for single core: "+str(single))
print("time for multi core: "+str(multi))
print("time for multi async core: "+str(multi_as))
the results are accurate in calculating weighted global efficiency, however, it is taking too long.