2

This is my transaction data. It shows the transactions made from the accounts in from column to the accounts in to column with the date and the amount information

data 

id          from    to          date        amount  
<int>       <chr>   <chr>       <date>      <dbl>
19521       6644    6934        2005-01-01  700.0
19524       6753    8456        2005-01-01  600.0
19523       9242    9333        2005-01-01  1000.0
…           …       …           …           …
1056317     7819    7454        2010-12-31  60.2
1056318     6164    7497        2010-12-31  107.5
1056319     7533    7492        2010-12-31  164.1

I want to calculate closeness centrality measure on the networks of transactions made in the last 6 month prior to the date each particular transaction was made and want to save this information as a new column in the original data.

example data I'll use here is:

structure(list(id = c(83324L, 87614L, 88898L, 89874L, 94765L, 
100277L, 101587L), from = c("5370", "7816", "8046", "5492", "8756", 
"5370", "9254"), to = c("9676", "5370", "5370", "5370", "5370", 
"9105", "5370"), date = structure(c(13391, 13400, 13404, 13409, 
13428, 13452, 13452), class = "Date"), amount = c(261.1, 16400, 
3500, 2700, 19882, 182, 14.6)), row.names = c(NA, -7L), class = "data.frame")

Now, this following code works very well to accomplish this in a small dataset:

library(tnet)
closeness_fnc <- function(data){
  accounts <- data[date == max(date),from]
  id <- data[date == max(date),id]  
  
  # for directed networks
  df <- data %>% group_by(from, to) %>% mutate(weights = sum(amount)) %>% select(from, to, weights) %>% distinct
  cl <- closeness_w(df, directed = T, gconly=FALSE, alpha = 0.5) 
  
  list(
    id = id,
    closeness_directed = cl[,"n.closeness"][accounts]
  )

}

network_data <- data[, closeness_fnc(data[(date >= end_date - 180) & (date <= end_date)]), .(end_date = date)] %>% select(-end_date)
# adding this info into the original data
data <- merge(x = data, y = network_data, by = "id")

So, the output is as I expected:

# data
id      from    to      date        amount  closeness_directed 
<int>   <chr>   <chr>   <date>      <dbl>   <dbl> 
83324   5370    9676    2006-08-31  261.1   1.00000000
87614   7816    5370    2006-09-09  16400.0 0.98744695
88898   8046    5370    2006-09-13  3500.0  0.35329017
89874   5492    5370    2006-09-18  2700.0  0.25176754
94765   8756    5370    2006-10-07  19882.0 0.39233504
100277  5370    9105    2006-10-31  182.0   0.07167582
101587  9254    5370    2006-10-31  14.6    0.02390589

However, since my data has over 1 million rows, this code will take more than a day to complete(it runs for more than 12 hours and hasn't yet finished).

I had a similar running time problem here and I want to apply the same logic to this code. So, I modified my code as follows:

library(tnet)
closeness_fnc <- function(data){
  accounts <- data[date == max(date),from]
  id <- data[date == max(date),id]  
  
  # for directed networks
  df <- data %>% group_by(from, to) %>% mutate(weights = sum(amount)) %>% select(from, to, weights) %>% distinct
  cl <- closeness_w(df, directed = T, gconly=FALSE, alpha = 0.5) 
  
  closeness_directed <- cl[,"n.closeness"][accounts]
  closeness_directed <- as.data.frame(closeness_directed)
  closeness_directed$from <- rownames(closeness_directed)
  rownames(closeness_directed) <- NULL

  return(closeness_directed)
}

# this is the approach given in the link I provided:
setDT(data)[, date_minus_180 := date - 180]
data[, ':=' (closeness_directed = data[data, closeness_fnc(data), 
     on = .(from, date <= date, date >= date_minus_180), by = .EACHI]$closeness_directed
     )] %>% select(-date_minus_180)

however, that won't work obviously since

data[data, closeness_fnc(data), 
     on = .(from, date <= date, date >= date_minus_180), by = .EACHI]

gives the output

from   date         date     closeness_directed      from   
<chr>  <date>      <date>       <dbl>                <chr>

5370    2006-08-31  2006-03-04  0.07167582           5370
5370    2006-08-31  2006-03-04  0.02390589           9254
7816    2006-09-09  2006-03-13  0.07167582           5370
7816    2006-09-09  2006-03-13  0.02390589           9254
8046    2006-09-13  2006-03-17  0.07167582           5370
8046    2006-09-13  2006-03-17  0.02390589           9254
5492    2006-09-18  2006-03-22  0.07167582           5370
5492    2006-09-18  2006-03-22  0.02390589           9254
8756    2006-10-07  2006-04-10  0.07167582           5370
8756    2006-10-07  2006-04-10  0.02390589           9254
1-10 of 14 rows

So, now how can I adjust the code here to solve the problem?

A larger dataset

structure(list(id = c(19521L, 19522L, 19523L, 19524L, 19525L, 
19526L, 19527L, 19528L, 19529L, 19530L, 19531L, 0L, 19532L, 19533L, 
19534L, 21971L, 21972L, 21973L, 21974L, 21975L, 21976L, 21977L, 
21978L, 21979L, 21980L, 21981L, 1L, 21761L, 21762L, 21763L, 21764L, 
21765L, 21766L, 21767L, 21982L, 21983L, 21984L, 21768L, 21769L, 
21770L, 21771L, 21772L, 21773L, 2L, 21774L, 21775L, 21776L, 21777L, 
21778L, 21779L, 21780L, 21781L, 21782L, 3L, 21783L, 21784L, 21785L, 
21786L, 21787L, 21788L, 21789L, 21790L, 21791L, 21792L, 21793L, 
21794L, 21795L, 21796L, 4L, 21797L, 21798L, 21799L, 21800L, 21801L, 
21802L, 21803L, 21804L, 21805L, 21806L, 21807L, 21808L, 21809L, 
21810L, 21811L, 21812L, 21813L, 21814L, 21815L, 5L, 21816L, 21817L, 
21818L, 21819L, 21820L, 21821L, 21822L, 21823L, 21824L, 21825L, 
21826L, 21827L, 21828L, 21829L, 21830L, 6L, 21831L, 21832L, 21833L, 
21834L, 21835L, 21836L, 21837L, 21838L, 7L, 21839L, 21840L, 21841L, 
21842L, 21843L, 21844L, 21845L, 21846L, 21847L, 21848L, 21849L, 
21850L, 21851L, 21852L, 21853L, 21854L, 21855L, 21856L, 21857L, 
8L, 21858L, 21859L, 9L, 10L, 21860L, 21861L, 21862L, 21863L, 
21864L, 21865L, 21866L, 21867L, 21868L, 21869L, 21870L, 21871L, 
21872L, 21873L, 21874L, 21875L, 21876L, 21877L, 21878L, 21879L, 
21880L, 21881L, 21882L, 21883L, 21884L, 21885L, 21886L, 21887L, 
21888L, 21889L, 21890L, 21891L, 21892L, 21893L, 21894L, 21895L, 
21896L, 21897L, 21898L, 21899L, 21900L, 11L, 21901L, 21902L, 
21903L, 21904L, 21905L, 21906L, 21907L, 21908L, 21909L, 12L, 
21910L, 21911L, 21912L, 21913L, 21914L, 21915L, 21916L, 21917L, 
21918L, 21919L, 13L, 21920L, 21921L, 21922L, 21923L, 21924L, 
21925L, 21926L, 21927L, 21928L, 21929L, 21930L, 21931L, 21932L, 
21933L, 21934L, 21935L, 21936L, 14L, 21937L, 21938L, 21939L, 
21940L, 21941L, 21942L, 21957L, 21958L, 21959L, 21960L, 21961L, 
21962L, 21963L, 21964L, 15L, 21965L, 21966L, 21967L, 21968L, 
21969L, 21970L, 21985L, 21986L, 21987L, 21988L, 21989L, 21990L, 
21991L, 21992L, 21993L, 21994L, 21995L, 21996L, 16L, 17L, 21551L, 
21552L, 21553L, 21554L, 21555L, 21556L, 21557L, 21558L, 21559L, 
21560L, 21561L, 21562L, 21563L, 21564L, 21565L, 21566L, 21567L, 
21997L, 21998L, 18L, 21568L, 21569L, 21570L, 21571L, 21572L, 
21573L, 21574L, 21575L, 21576L, 21577L, 21578L, 21579L, 21580L, 
21581L, 19L, 21582L, 21583L, 21584L, 21585L, 21586L, 21587L, 
21588L, 21589L, 21590L, 21591L, 21592L, 20L, 21593L, 21594L, 
21595L, 21596L, 21597L, 21598L, 21599L, 21600L, 21601L, 21602L, 
21603L, 21604L, 21605L, 21606L, 21L, 21607L, 21608L, 21609L, 
21610L, 21611L, 21612L, 21613L, 21614L, 21615L, 21616L, 21617L, 
21618L, 21619L, 21620L, 21621L, 21622L, 21623L, 21624L, 21625L, 
21626L, 22L, 21627L, 21628L, 21629L, 21630L, 21631L, 21632L, 
21633L, 21634L, 21635L, 21636L, 21637L, 21638L, 21639L, 21640L, 
21641L, 21642L, 21643L, 21644L, 21645L, 23L, 21646L, 21647L, 
21648L, 21649L, 21650L, 21651L, 21652L, 21653L, 21654L, 21655L, 
21656L, 21657L, 21658L, 24L, 21659L, 21660L, 21661L, 21662L, 
21663L, 21664L, 21665L, 21666L, 21667L, 21668L, 21669L, 25L, 
21670L, 21671L, 21672L, 21673L, 21674L, 21675L, 21676L, 21677L, 
21678L, 21679L, 21680L, 21681L, 21682L, 21683L, 26L, 21684L, 
21685L, 21686L, 21687L, 21688L, 21689L, 21690L, 21691L, 21692L, 
21693L, 21694L, 21695L, 21696L, 21697L, 21698L, 21699L, 21700L, 
21701L, 21702L, 21703L, 27L, 21704L, 21719L, 21720L, 21721L, 
21722L, 21723L, 21724L, 21725L, 21726L, 21727L, 21728L, 21729L, 
21730L, 21731L, 21732L, 28L, 21733L, 21734L, 21735L, 21736L, 
21737L, 21738L, 21739L, 21740L, 29L, 21741L, 21742L, 21743L, 
21744L, 21745L, 21746L, 21747L, 21748L, 21749L, 21750L, 21751L, 
21752L, 21753L, 21754L, 21755L, 21756L, 21757L, 21758L, 30L, 
31L, 32L, 33L, 34L, 35L, 36L, 37L, 21229L, 21230L, 21231L, 21232L, 
21233L, 21234L, 21235L, 21236L, 21237L, 21238L, 21239L, 21240L, 
21241L, 21242L, 21243L, 21244L, 21245L, 21246L, 21247L, 21248L, 
21249L, 21250L, 21251L, 21252L, 21253L, 21254L, 21255L, 21256L, 
21257L, 21258L), from = c("6644", "9843", "9242", "6753", "7075", 
"8685", "5513", "6340", "6042", "5587", "7237", "5695", "9582", 
"8539", "7939", "9077", "8946", "5591", "8380", "5865", "7867", 
"9457", "6968", "7971", "6150", "9361", "9379", "8409", "9740", 
"7226", "7531", "6752", "7362", "6661", "5730", "5417", "9049", 
"7057", "6252", "9476", "6228", "8896", "7371", "8170", "7122", 
"6694", "5450", "9435", "5619", "8289", "9862", "5504", "6555", 
"9845", "7537", "9482", "6810", "8257", "8490", "6588", "9652", 
"7303", "5852", "5746", "9198", "6917", "8688", "9460", "9640", 
"7054", "8628", "7065", "9006", "6832", "6185", "8422", "6914", 
"7069", "7848", "8436", "5494", "6375", "5653", "8912", "9794", 
"8413", "6527", "9101", "5815", "6923", "8184", "6811", "8130", 
"6539", "8643", "6329", "7744", "8211", "9641", "8003", "5599", 
"8715", "7108", "9573", "8583", "5648", "6444", "5660", "8191", 
"9830", "5931", "7921", "6753", "8314", "7940", "6265", "6604", 
"6509", "5618", "5860", "6469", "9525", "5887", "6626", "7145", 
"6862", "5741", "9144", "9862", "9163", "7297", "7599", "8427", 
"8865", "9418", "8636", "6530", "9155", "6934", "8817", "9028", 
"5521", "5943", "7443", "9557", "8239", "6819", "9761", "5983", 
"6830", "6368", "5381", "8782", "8008", "9160", "9862", "8008", 
"9615", "6920", "6164", "6278", "9729", "8960", "6358", "5939", 
"8902", "9522", "7344", "9070", "6594", "8058", "6639", "7896", 
"6325", "7804", "9554", "9725", "8475", "7746", "7536", "9671", 
"9761", "5415", "6837", "8327", "9061", "8981", "9226", "5862", 
"7085", "8925", "6226", "6849", "8432", "9545", "5837", "5440", 
"9732", "8695", "7690", "5829", "9373", "7977", "6361", "7320", 
"7603", "6303", "7077", "7850", "5792", "9588", "9204", "8648", 
"8950", "7106", "6334", "6843", "7060", "9606", "5520", "9725", 
"9350", "7463", "8130", "7947", "9668", "9490", "6241", "8830", 
"6374", "9528", "7919", "8532", "6795", "6934", "8162", "9275", 
"8106", "8615", "9206", "8283", "6265", "7052", "7737", "8422", 
"7815", "9028", "7932", "6125", "6671", "7800", "9835", "5573", 
"7874", "8931", "6748", "8192", "6822", "6950", "8020", "8555", 
"8986", "7644", "5736", "8421", "6224", "8374", "8304", "9101", 
"8677", "9208", "7008", "6074", "9409", "6269", "9721", "9304", 
"9117", "5420", "9691", "7728", "8422", "8579", "7495", "9838", 
"8139", "9571", "5385", "5454", "9620", "7723", "9249", "7033", 
"7966", "5837", "9844", "5793", "5747", "6362", "6925", "9318", 
"6780", "6934", "7150", "6818", "7246", "5514", "9574", "7838", 
"5540", "6646", "6893", "6417", "8039", "8721", "8763", "6401", 
"6510", "7970", "7117", "6001", "7505", "7646", "5600", "6522", 
"8395", "5601", "5418", "6296", "8790", "7622", "9012", "8165", 
"7624", "5468", "9316", "9030", "7155", "5702", "7492", "8503", 
"9868", "6807", "6404", "9076", "7213", "8735", "7849", "8551", 
"9351", "6693", "6795", "9653", "9504", "6948", "9358", "9280", 
"8168", "5456", "9138", "8420", "9312", "8930", "6375", "8695", 
"7699", "6748", "5506", "9475", "5776", "5517", "5644", "8680", 
"5474", "7534", "9363", "9586", "6508", "6193", "5401", "8032", 
"8461", "9387", "5812", "7564", "5917", "5434", "5794", "7840", 
"9085", "8331", "7060", "7175", "6669", "8896", "6352", "7432", 
"9810", "8776", "6934", "6112", "8869", "8248", "9450", "6974", 
"7264", "7336", "6880", "7866", "7777", "7502", "5615", "9777", 
"7371", "9214", "6374", "6039", "7714", "9056", "8358", "8963", 
"8657", "8846", "9319", "7220", "7764", "8967", "8683", "9137", 
"6971", "9747", "7449", "8259", "5373", "7300", "6273", "8391", 
"7862", "5696", "6622", "5456", "9240", "7021", "7313", "7247", 
"6679", "8102", "6812", "9473", "6345", "7935", "9696", "5541", 
"8939", "5417", "6887", "8998", "7977", "9110", "8666", "6670", 
"8975", "7518", "5601", "7549", "7841", "8888", "5808", "9545", 
"9460", "9361", "9807", "6860", "9811", "5935", "8966", "8684", 
"5915", "8892", "8493", "7894", "6342", "6382", "8461", "7833", 
"7201", "7253", "6720", "6175", "9201", "5682", "5473", "7173", 
"6094", "8810", "5874", "6947", "8462", "6885", "6201"), to = c("6934", 
"9115", "9333", "8456", "6510", "7207", "6046", "7047", "6213", 
"9493", "6248", "7468", "8925", "6727", "6912", "6727", "9811", 
"9493", "9251", "6375", "6460", "6375", "8130", "5773", "6510", 
"6951", "6213", "6671", "6153", "6634", "9440", "8220", "8512", 
"8105", "8786", "5773", "6454", "5997", "8374", "7207", "6253", 
"9251", "8456", "7517", "6935", "6143", "8220", "9628", "5837", 
"9115", "6517", "9628", "8078", "6143", "6912", "7047", "6460", 
"7517", "6442", "9333", "6646", "5997", "8395", "6153", "9012", 
"6248", "7468", "8105", "6254", "9811", "7518", "6217", "6951", 
"8551", "9012", "5605", "6671", "7084", "8925", "5985", "8130", 
"5443", "8665", "8657", "8395", "6883", "6334", "8472", "6669", 
"5715", "5409", "8876", "8869", "9450", "5610", "6934", "6043", 
"7253", "6646", "7564", "6934", "5668", "6986", "7382", "6934", 
"8671", "6646", "8336", "9750", "8967", "9137", "8912", "5373", 
"9240", "6934", "8925", "6273", "6566", "6164", "9240", "6145", 
"7247", "7134", "5606", "9682", "5635", "8820", "8763", "7492", 
"5837", "6634", "8323", "6616", "6374", "8678", "7293", "6143", 
"8105", "7843", "6375", "7207", "5997", "9628", "9240", "9811", 
"5837", "8395", "8456", "9811", "9333", "9251", "6153", "6213", 
"6248", "9115", "8925", "6634", "6671", "8130", "6646", "9333", 
"6727", "6510", "6460", "8220", "9493", "9750", "6934", "6912", 
"6951", "7047", "9012", "9750", "5773", "7517", "7468", "8456", 
"7207", "6192", "9131", "6046", "7143", "7047", "6213", "6333", 
"7603", "6248", "9620", "6995", "9770", "5835", "8925", "5614", 
"8846", "8134", "7468", "8887", "8631", "9744", "9251", "6217", 
"6934", "7247", "8697", "6727", "5606", "9664", "6460", "6442", 
"8374", "6334", "9440", "9493", "9845", "7492", "5605", "8078", 
"9202", "6454", "5635", "8657", "8606", "8395", "9037", "5773", 
"6951", "6807", "9770", "8631", "9845", "8512", "6253", "6989", 
"6375", "7248", "8665", "8786", "8887", "5668", "6374", "6883", 
"9519", "8134", "6510", "5443", "6646", "6634", "5373", "7084", 
"6033", "8967", "8105", "9565", "9723", "8925", "7222", "6361", 
"8739", "8739", "6502", "9085", "5980", "5980", "5385", "5773", 
"7001", "9200", "7603", "7471", "9620", "5610", "6794", "9457", 
"8336", "6935", "5409", "5621", "5614", "9664", "7517", "7518", 
"6669", "6517", "6114", "7207", "9628", "9251", "8456", "8078", 
"6935", "6772", "9535", "8869", "7222", "7034", "6986", "6566", 
"8220", "7155", "7446", "9202", "6934", "9333", "6046", "9535", 
"8678", "6273", "6896", "7345", "9115", "8183", "6634", "6254", 
"7471", "9628", "9333", "9457", "9457", "9137", "6043", "8671", 
"6479", "6503", "5715", "7143", "5592", "6912", "7047", "6460", 
"7517", "6143", "9712", "8472", "7382", "6995", "6192", "7518", 
"6145", "8912", "6844", "7253", "7109", "8763", "5997", "5985", 
"6807", "6153", "6329", "7213", "8551", "7564", "7155", "6248", 
"7468", "8105", "5605", "6503", "8820", "5562", "8697", "7109", 
"9811", "6984", "6951", "8323", "9450", "9012", "6616", "5922", 
"9682", "9839", "8041", "5443", "9039", "8178", "7293", "8665", 
"8657", "8846", "7990", "8168", "7646", "8472", "9803", "8041", 
"8879", "9085", "8178", "7624", "8221", "5776", "8422", "9085", 
"8545", "8321", "5473", "6994", "6673", "6934", "7769", "5409", 
"6104", "8876", "7818", "8941", "5610", "7825", "7770", "6043", 
"7253", "8790", "7564", "8178", "8846", "6954", "7382", "6986", 
"6194", "8671", "9741", "5384", "8846", "8653", "6659", "9750", 
"9744", "9138", "9321", "7124", "8912", "5866", "7718", "5468", 
"7321", "6795", "6042", "6566", "6164", "9084", "6507", "9033", 
"6807", "9240", "6540", "6857", "8945", "7134", "5606", "9390", 
"9682", "6359", "8757", "8763", "8280", "7049", "6205", "7604", 
"9729", "7492", "6085", "8239", "6299", "9845", "9240", "8323", 
"6616", "6671", "6669", "8657", "7471", "9744", "5443", "5837", 
"8395", "8551", "8456", "8472", "8374", "5610", "9811", "9682", 
"9333", "9251", "9202", "7603", "6192", "6143", "6153", "6329", 
"6213", "6273", "6248", "7109", "7143", "8041", "8665", "8925", 
"9115", "6634", "6671"), date = structure(c(12784, 12784, 12784, 
12784, 12785, 12785, 12786, 12786, 12786, 12786, 12786, 12787, 
12787, 12787, 12787, 12788, 12788, 12788, 12788, 12789, 12789, 
12790, 12790, 12790, 12790, 12790, 12791, 12791, 12791, 12791, 
12791, 12791, 12791, 12791, 12791, 12791, 12791, 12792, 12792, 
12792, 12792, 12792, 12792, 12793, 12793, 12793, 12793, 12793, 
12794, 12794, 12794, 12794, 12794, 12795, 12795, 12795, 12795, 
12795, 12795, 12795, 12795, 12796, 12796, 12796, 12796, 12796, 
12796, 12796, 12797, 12797, 12797, 12797, 12797, 12797, 12797, 
12797, 12798, 12798, 12799, 12800, 12800, 12800, 12801, 12801, 
12801, 12802, 12802, 12802, 12803, 12803, 12804, 12804, 12804, 
12804, 12804, 12805, 12805, 12805, 12805, 12805, 12806, 12806, 
12806, 12806, 12807, 12807, 12807, 12807, 12807, 12807, 12808, 
12808, 12808, 12809, 12809, 12809, 12809, 12809, 12809, 12809, 
12810, 12810, 12810, 12810, 12810, 12811, 12811, 12811, 12811, 
12812, 12812, 12812, 12812, 12813, 12813, 12813, 12814, 12814, 
12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 
12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 
12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 
12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 12814, 
12814, 12814, 12814, 12815, 12815, 12816, 12816, 12816, 12816, 
12816, 12816, 12816, 12816, 12816, 12816, 12817, 12817, 12817, 
12817, 12817, 12817, 12817, 12818, 12818, 12818, 12818, 12819, 
12819, 12819, 12819, 12819, 12819, 12819, 12819, 12819, 12819, 
12819, 12819, 12819, 12819, 12819, 12819, 12819, 12819, 12820, 
12820, 12820, 12820, 12820, 12820, 12820, 12820, 12820, 12820, 
12820, 12820, 12820, 12820, 12820, 12821, 12821, 12821, 12821, 
12821, 12821, 12821, 12821, 12821, 12821, 12821, 12821, 12821, 
12821, 12821, 12821, 12821, 12821, 12821, 12822, 12822, 12822, 
12822, 12822, 12822, 12822, 12822, 12822, 12822, 12822, 12822, 
12822, 12822, 12822, 12822, 12822, 12822, 12822, 12822, 12822, 
12823, 12823, 12823, 12823, 12823, 12823, 12823, 12823, 12823, 
12823, 12823, 12823, 12823, 12823, 12823, 12824, 12824, 12824, 
12824, 12824, 12824, 12824, 12824, 12824, 12824, 12824, 12824, 
12825, 12825, 12825, 12825, 12825, 12825, 12825, 12825, 12825, 
12825, 12825, 12825, 12825, 12825, 12825, 12826, 12826, 12826, 
12826, 12826, 12826, 12826, 12826, 12826, 12826, 12826, 12826, 
12826, 12826, 12826, 12826, 12826, 12826, 12826, 12826, 12826, 
12827, 12827, 12827, 12827, 12827, 12827, 12827, 12827, 12827, 
12827, 12827, 12827, 12827, 12827, 12827, 12827, 12827, 12827, 
12827, 12827, 12828, 12828, 12828, 12828, 12828, 12828, 12828, 
12828, 12828, 12828, 12828, 12828, 12829, 12829, 12830, 12830, 
12830, 12830, 12830, 12830, 12831, 12831, 12831, 12831, 12831, 
12831, 12832, 12832, 12832, 12832, 12832, 12832, 12832, 12832, 
12833, 12833, 12833, 12833, 12833, 12833, 12833, 12834, 12834, 
12834, 12834, 12834, 12834, 12834, 12834, 12834, 12834, 12834, 
12835, 12835, 12835, 12835, 12835, 12836, 12836, 12836, 12836, 
12836, 12837, 12837, 12837, 12837, 12837, 12837, 12837, 12837, 
12837, 12837, 12838, 12838, 12838, 12838, 12838, 12838, 12839, 
12839, 12839, 12839, 12839, 12839, 12839, 12839, 12839, 12840, 
12840, 12840, 12840, 12840, 12840, 12840, 12841, 12841, 12841, 
12841, 12841, 12841, 12841, 12841, 12841, 12841, 12841, 12841, 
12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 
12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 
12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 
12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 12842, 
12842, 12842), class = "Date"), amount = c(700, 900, 1000, 600, 
400, 1100, 600, 1100, 200, 800, 1000, 700, 300, 800, 800, 5123, 
400, 3401, 700, 500, 700, 3242, 500, 400, 5298, 900, 11832, 300, 
500, 600, 1100, 600, 300, 800, 400, 6774, 300, 200, 400, 14264, 
900, 13851, 17366, 1000, 800, 700, 6007, 500, 400, 6207, 900, 
12644, 800, 4276, 6434, 14779, 4507, 6446, 800, 17477, 1100, 
5009, 1000, 5718, 800, 13967, 6959, 15914, 200, 4470, 600, 800, 
10737, 700, 44749, 1000, 46552, 500, 13156, 1000, 23323, 1100, 
200, 300, 10792, 200, 400, 700, 200, 700, 1100, 1000, 700, 500, 
1100, 7268, 300, 200, 16125, 400, 14440, 700, 900, 300, 49752, 
200, 36518, 500, 900, 300, 900, 1000, 200, 19961, 21899, 12336, 
1100, 200, 700, 1100, 900, 1100, 800, 600, 400, 200, 500, 200, 
200, 38000, 16983, 1000, 300, 1000, 300, 800, 13.4, 42.7, 34700, 
12.6, 47.5, 13.3, 37.1, 17, 11.1, 15.5, 22.2, 55.8, 11.8, 50.1, 
45, 15.9, 38.8, 38.2, 20.1, 38.9, 7.1, 107.1, 48, 62.4, 2900, 
21.5, 19.1, 14, 19.5, 15.2, 5282, 94.7, 19.4, 28.2, 42.7, 110.2, 
0.8, 23.1, 20, 19.6, 2000, 5100, 1100, 200, 11900, 1100, 5500, 
7500, 1100, 800, 6000, 200, 600, 800, 25300, 45647, 1000, 700, 
600, 7000, 700, 900, 300, 2900, 5224, 30192, 24381, 400, 5123, 
23330, 700, 8500, 3191, 23041, 5029, 6238, 3401, 900, 20213, 
7618, 19935, 600, 5859, 3375, 12817, 500, 38645, 1600, 10600, 
5500, 700, 3217, 14626, 4550, 4356, 6689, 600, 3242, 9612, 5080, 
5039, 12785, 4212, 17632, 3395, 200, 3399, 5298, 14493, 28157, 
1800, 31348, 5544, 14100, 33045, 1800, 200, 800, 20066, 400, 
1000, 27666, 500, 600, 700, 700, 3151, 1000, 6774, 800, 1500, 
22452, 1100, 44333, 18347, 200, 600, 6242, 13900, 19746, 400, 
48098, 7041, 9100, 10584, 49590, 3021, 500, 14264, 5400, 13851, 
17366, 1200, 5072, 1100, 1100, 47831, 12015, 5200, 8905, 23524, 
6007, 1000, 300, 22349, 31038, 25200, 43737, 12154, 23736, 24863, 
400, 200, 6207, 29700, 14622, 4758, 5810, 12644, 17477, 19588, 
27078, 32594, 25609, 20281, 700, 900, 6310, 14319, 14400, 6434, 
14779, 4507, 6446, 4276, 9600, 13875, 12043, 4391, 4327, 9000, 
6698, 16392, 700, 15263, 1100, 18729, 5009, 3098, 4729, 5718, 
700, 500, 24400, 9658, 14963, 13967, 6959, 15914, 9800, 20567, 
3058, 600, 18497, 6148, 4470, 400, 10737, 15447, 24009, 44749, 
12138, 900, 800, 1000, 900, 4200, 700, 1100, 1300, 8000, 6000, 
29511, 200, 900, 600, 6600, 1100, 44162, 600, 24023, 900, 400, 
200, 300, 800, 33410, 14800, 400, 800, 500, 500, 19136, 33900, 
4100, 10500, 13400, 600, 700, 3700, 1000, 1000, 100, 3300, 800, 
9400, 45925, 41740, 500, 6200, 8000, 200, 3100, 500, 300, 31332, 
62100, 600, 7100, 28361, 4000, 200, 4500, 900, 900, 900, 1000, 
1500, 300, 2500, 2700, 11000, 300, 800, 900, 8900, 23990, 1100, 
1400, 800, 10700, 1800, 1100, 10900, 900, 200, 5200, 800, 200, 
800, 200, 900, 3900, 900, 600, 900, 18.7, 102, 2.9, 3, 39285, 
52.1, 34.1, 18.5, 21.3, 38.3, 160, 110.5, 58.6, 83.4, 34.7, 68.6, 
31, 20.3, 5.4, 89.3, 110.6, 61.5, 72.7, 13.7, 20.7, 25.9, 2.9, 
50.1, 14, 110, 16.2, 39, 73.8, 23.7, 249, 29.6, 117.3, 199)), row.names = c(NA, 
-500L), class = "data.frame")
Cole
  • 11,130
  • 1
  • 9
  • 24
rlock
  • 133
  • 9
  • 1
    I think your bottle neck is `tnet::closeness_w`. It looks very slow and if you need to do it per join, it will be slow. I'm also not sure how it works with your data as `to` and `from` supposed to be numeric rather character. – David Arenburg Sep 30 '20 at 08:30

2 Answers2

1

Here is a way to do it:

closeness_fnc <- function(DT, grp){
  cl <- tnet::closeness_w(DT[, 
                             sum(amount),
                             by = .(from, to)], 
                          directed = T,
                          gconly=FALSE, 
                          alpha = 0.5) 
  
  ind = DT[date == max(date), from]
  cl[ind,"n.closeness"]))
}

setDT(data)
data[, closeness_directed := closeness_fnc(data[(date >= end_date - 180) & (date <= end_date)]), by = .(end_date = date)]

##     end_date     id closeness_directed
##       <Date>  <int>              <num>
##1: 2006-08-31  83324         1.00000000
##2: 2006-09-09  87614         0.98744695
##3: 2006-09-13  88898         0.35329017
##4: 2006-09-18  89874         0.25176754
##5: 2006-10-07  94765         0.39233504
##6: 2006-10-31 100277         0.07167582
##7: 2006-10-31 101587         0.02390589

Note, the closeness_fnc() was just simplified to use only . The code could have similar adjustments. Then I mainly used your original approach which seemed to work well.

Edit Part of the problem is that we're aggregating each from and to for every date. We can instead do the aggregation all at once and create a lookup table.

closeness_fx3 = function(from, to, amount, date) {
  cl = closeness_w(data.frame(from, to, amount),
                   directed = TRUE,
                   gconly = FALSE,
                   alpha = 0.5)
  
  cl[from[date == max(date)], "n.closeness"]
}

setDT(data)
agg_data = data[, .(tot_amount = sum(amount)), by = .(date)]
agg_data[, end_date := date - 180]

data[, 
      closeness_directed := .SD[agg_data,
                                on = .(date >= end_date,
                                       date <= date),
                                closeness_fx3(from, to, amount, x.date),
                                by = .EACHI]$V1
]
data

Using bench::mark which includes checking results for equality, this method is slightly faster than other solutions.

# A tibble: 3 x 13
  expression        min median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time
  <bch:expr>      <bch> <bch:>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm>
1 simple_refactor 414ms  592ms      1.81     188MB     2.89    10    16      5.54s
2 ekoam           337ms  356ms      2.39     183MB     3.59    10    15      4.18s
3 use_agg         299ms  314ms      2.77     178MB     3.88    10    14       3.6s
Cole
  • 11,130
  • 1
  • 9
  • 24
  • Thank you, but `data[, closeness_fnc(data[(date >= end_date - 180) & (date <= end_date)]), by = .(end_date = date)]` is the part that slows the overall performance. That's why I wanted to use `data[, ':=' (closeness_directed = data[data, closeness_fnc(data), on = .(from, date <= date, date >= date_minus_180), by = .EACHI]$closeness_directed )] %>% select(-date_minus_180)` approach. – rlock Sep 27 '20 at 19:49
  • I see. The alternative approach does not work, though. You need more of a cartesian product. I will think about this and if I do not come up with something, I'll delete. Thanks. – Cole Sep 27 '20 at 20:49
  • Do you have a better dataset? One that may be more representative? This one is difficult because it is so small that I cannot really test speedups. – Cole Sep 28 '20 at 00:07
  • 1
    I could only add a dataset with 500 rows because of the limitation. Hopefully, that's also enough @Cole However, it will run fast on such data – rlock Sep 28 '20 at 00:18
  • Ok, see edit. Note, my original method was pretty performant. although I did edit original to include ```:=```. – Cole Oct 03 '20 at 16:06
  • I don't know why but it gives this error: "Error in as.tnet(net, type = "weighted one-mode tnet") : There are duplicated entries in the edgelist" – rlock Oct 03 '20 at 17:33
  • On example data or real data? – Cole Oct 03 '20 at 17:36
  • On the real data. – rlock Oct 03 '20 at 17:47
1

Consider re-writing your closeness_func as follows:

closeness_info <- function(from, DT) {
  DT <- DT[, .(weights = sum(amount)), by = .(from, to)]
  res <- closeness_w(DT, TRUE, FALSE, alpha = 0.5)
  `names<-`(res[, "n.closeness"], row.names(res))[from]
}

Then you can use the following data.table operation to achieve your goal:

set_closeness <- function(DT) {
  DT[, closeness_directed := closeness_info(.SD$from, DT[between(date, .BY$date - 180, .BY$date), ]), by = date]
}

Now let's benchmark the performances of this implementation against your original one with the long dataset you provide. Here I call it df. We first create two copies of that dataset. This is necessary as data.table by default uses pass-by-reference. If we do not create two copies of the dataset, then the following tests will always be applied to the same object (i.e. df).

data1 <- copy(df)
data2 <- copy(df)

We apply your original implementation to data1:

> system.time({
+   closeness_fnc <- function(data){
+     accounts <- data[date == max(date),from]
+     id <- data[date == max(date),id]  
+     
+     # for directed networks
+     df <- data %>% group_by(from, to) %>% mutate(weights = sum(amount)) %>% select(from, to, weights) %>% distinct
+     cl <- closeness_w(df, directed = T, gconly=FALSE, alpha = 0.5) 
+     
+     list(
+       id = id,
+       closeness_directed = cl[,"n.closeness"][accounts]
+     )
+     
+   }
+   network_data <- data1[, closeness_fnc(data1[(date >= end_date - 180) & (date <= end_date)]), .(end_date = date)] %>% select(-end_date)
+   data1 <- merge(x = data1, y = network_data, by = "id")
+ })
   user  system elapsed 
   1.19    0.07    1.26

Then we apply the new implementation above to data2:

> system.time({
+   closeness_info <- function(from, DT) {
+     DT <- DT[, .(weights = sum(amount)), by = .(from, to)]
+     res <- closeness_w(DT, TRUE, FALSE, alpha = 0.5)
+     `names<-`(res[, "n.closeness"], row.names(res))[from]
+   }
+   set_closeness <- function(DT) {
+     DT[, closeness_directed := closeness_info(.SD$from, DT[between(date, .BY$date - 180, .BY$date), ]), by = date]
+   }
+   set_closeness(data2)
+ })
   user  system elapsed 
   0.33    0.07    0.40 

Do they produce the same set of results?

> all(data1[order(id), ] == data2[order(id), ])
[1] TRUE

Output:

> data2[order(id), ]
        id from   to       date amount closeness_directed
  1:     0 5695 7468 2005-01-04    700       0.0344016544
  2:     1 9379 6213 2005-01-08  11832       0.0492013976
  3:     2 8170 7517 2005-01-10   1000       0.0097043019
  4:     3 9845 6143 2005-01-12   4276       0.0142486370
  5:     4 9640 6254 2005-01-14    200       0.0022874217
 ---                                                     
496: 21994 6671 6033 2005-02-07  14100       0.0064464840
497: 21995 7800 8967 2005-02-07  33045       0.0098688428
498: 21996 9835 8105 2005-02-07   1800       0.0023032952
499: 21997 7008 6794 2005-02-08    200       0.0006955321
500: 21998 6074 9457 2005-02-08    600       0.0026025058

I think this might be the best performance you can get given that you do not want to rewrite the closeness_w function, which is probably the bottleneck here as per David's advice.

ekoam
  • 8,744
  • 1
  • 9
  • 22