1

I am struggling with cellwise-calculations in a complex data-set (see below for dput() example).

I need to apply the formula for standardized mean difference (M1-M2/sqrt(s1^2+s2^2) to multiple rows and columns (studies and tests). The M1 & M2 (means) values are in the pr_cognm_ columns and s1 and s2 (standard deviations) in the pr_cognsd_ columns, and they should be calculated dependent on the factors id & tx...3 (treatments).

So e.g. for the pr_cognm_VV2_CRT_error column the id 336 has two rows and, in this case!, the values in the treat1 row need to be subtracted from VGT. But sometimes it is the other way around, e.g. within id 162 dGT needs to be subtracted from treat1 (luckily, this logic will be the same for each specific comparison though). Then, the same thing needs to happen with the standard deviations (i.e. potentiate and addition). Lastly, M and S need to be divided. There are many tests (columns) to run the formula on (e.g.pr_cognm_BVMT_perc_retention, pr_cognm_VV2_CRT_error, etc.) and often NA, since the specific id did not have this test. The data is in the long format and, to make it more complicated, some id have three instead of two rows (where two rows need to be subtracted from one other in a specific direction e.g. task1).

My best idea was to

#make a dataset
a <- readxl::read_excel("C:/.../reprod.xlsx")
b<- a[!grepl("com", a$id),] #already omitted in example dataset
pr_cognm <- dplyr::select(b,contains("pr_cognm"))
pr_cognsd <- dplyr::select(b,contains("pr_cognsd"))

c <- cbind(b$tx...3, b$id ,pr_cognm, pr_cognsd)
c$`b$id` <- as.factor(c$`b$id`)

#turn var's into numerics and factors
#potentiate all standard deviations (s1^2 and s2^2)
c[,3:ncol(c)] <- sapply(c[,3:ncol(c)], as.numeric)
c[,grepl("pr_cognsd", colnames(c))] <- c[,grepl("pr_cognsd", colnames(c))]^2

#then reshape
require(reshape2)
c %>% 
  dcast(b$id ~ b$tx...3, value.var = c("pr_cognm_VV2_CRT_error"), fill = 0)

   b$id BF BL BT dGT H-TT  HFL LM-TT treat1  VGT
1    55  0  0  0   0    0 0.00  0.00   0.00 0.00
2   162  0  0  0   0    0 0.00  0.00   0.00 0.00
3   236  0  0  0   0    0 0.00  0.00   0.00 0.00
4   336  0  0  0   0    0 0.00  0.00   8.75 7.58
5   377  0  0  0   0    0 0.00  0.00   0.00 0.00
6   521  0  0  0   0    0 0.00  0.00   0.00 0.00
7   525  0  0  0   0    0 0.00  0.00   0.00 0.00
8   527  0  0  0   0    0 0.00  0.00   0.00 0.00
9   528  0  0  0   0    0 0.00  0.00   0.00 0.00
10  535  0  0  0   0    0 5.65  6.54   0.00 0.00
11  548  0  0  0   0    0 0.00  0.00   0.00 0.00
12  553  0  0  0   0    0 0.00  0.00   0.00 0.00

Now I could define the rules which variables should be added like c$sub <- c$treat1-c$VGT and c$sub <- c$HFL-c$LM-TT, do the addition with the SD's in a similar fashion and finally divide the two variables to find the SMD. BUT, this only works for one test. In this case value.var = c("pr_cognm_VV2_CRT_error"). I would like to get this matrix for every test I have in the dataset via e.g. loop since more than one value.var dont work:

require(reshape2)
c %>% 
   dcast(b$id ~ b$tx...3, value.var = c("pr_cognm_VV2_CRT_error", "pr_cognm_BNT_perc_retention"), fill = 0)

Error in .subset2(x, i, exact = exact) : subscript out of bounds
In addition: Warning message:
In if (!(value.var %in% names(data))) { :
  the condition has length > 1 and only the first element will be used

If there were a way to loop via

c %>% 
+   dcast(b$id ~ b$tx...3, value.var = c(names(c[,3:ncol(c)]), fill = 0)

then I could maybe rbind them and do the subtractions into a new variable as described above and after doing the same for the SD's I would finally be able to do the division to get the SMD.

I could not get any solutions to work.

Reprod example (truncated):

a <- structure(list(checked = c("Y", "Y", NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA), id = c("55", "55", "162", "162", "236", "236", "336", "336", 
"377", "377", "521", "521", "525", "525", "527", "527", "528", 
"528", "535", "535", "548", "548", "548", "553", "553"), tx...3 = c("task1", 
"VGT", "dGT", "task1", "BT", "H-TT", "task1", "VGT", "BT", "H-TT", 
"task1", "VGT", "HFL", "H-TT", "BF", "BT", "HFL", "task1", "HFL", 
"LM-TT", "HFL", "BL", "task1", "HFL", "task1"), nta = c(2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
2, 2), id2 = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 
1, 2, 1, 2, 1, 2, 3, 1, 2), cross = c("N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N"), pre_post = c("N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N"), case_control = c("N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"), expsy = c("Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y"), hosp = c("Out", 
"Out", "Out", "Out", "In", "In", "NA", "NA", "In", "In", "NR", 
"NR", "NR", "NR", "Mx", "Mx", "NR", "NR", "Mx", "Mx", "Out", 
"Out", "Out", "Out", "Out"), tx...11 = c("task1", "VGT", "dGT", 
"task1", "BT", "H-TT", "task1", "VGT", "BT", "H-TT", "task1", 
"VGT", "HFL", "H-TT", "BF", "BT", "HFL", "task1", "HFL", "LM-TT", 
"HFL", "BL", "task1", "HFL", "task1"), vt_p = c("17", "17", "24", 
"24", "21", "21", "NR", "NR", "NR", "NR", "NA", "NA", "17", "17", 
"24", "24", "17", "17", "17", "17", "17", "17", "17", "17", "17"
), n_se = c("12", "12", "20", "20", "6", "6", "10", "10", "NR", 
"NR", "20", "20", "10", "6", "8", "8", "15", "15", "10", "6", 
"15", "15", "15", "10", "10"), cogn_name_AMI_K = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA), cogn_cite_AMI_K = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), cogn_last_stim_AMI_K = c(NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), n_bcogn_AMI_K = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), n_pcogn_AMI_K = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA), pr_cognm_AMI_K = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    pr_cognsd_AMI_K = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), po_cognm_n_AMI_K = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA), po_cognsd_n_AMI_K = c(NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), cogn_name_BVMT_perc_retention = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, "brief_visual_memory_test", "brief_visual_memory_test", 
    "brief_visual_memory_test", NA, NA), cogn_cite_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "benedict_1997", "benedict_1997", "benedict_1997", 
    NA, NA), cogn_last_stim_BVMT_perc_retention = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, "NR", "NR", "NR", NA, NA), n_bcogn_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, 19, 24, 18, NA, NA), n_pcogn_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, 19, 24, 18, NA, NA), pr_cognm_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "91.6", "86.6", "90", NA, NA), pr_cognsd_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "17", "36", "13.3", NA, NA), po_cognm_n_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "86.7", "82.1", "71.7", NA, NA), po_cognsd_n_BVMT_perc_retention = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "20", "39.3", "24.2", NA, NA), cogn_name_BNT_naming = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "boston_naming_task_naming", 
    "boston_naming_task_naming", NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA), cogn_cite_BNT_naming = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "kaplan_1983", "kaplan_1983", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), cogn_last_stim_BNT_naming = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, 30, 30, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA), n_bcogn_BNT_naming = c(NA, NA, NA, NA, NA, 
    NA, NA, NA, 14, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA), n_pcogn_BNT_naming = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, 14, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), pr_cognm_BNT_naming = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, "19.64", "18.14", NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), pr_cognsd_BNT_naming = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "9.15", "5.3", NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), po_cognm_n_BNT_naming = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "20.21", "20.71", NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), po_cognsd_n_BNT_naming = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "9.38", "6.34", NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), cogn_name_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "VV2_crt_error", "VV2_crt_error", NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, "VV2_crt_error", "VV2_crt_error", 
    NA, NA, NA, NA, NA), cogn_cite_VV2_CRT_error = c(NA, NA, 
    NA, NA, NA, NA, "robbins_1994", "robbins_1994", NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, "robbins_1994", "robbins_1994", 
    NA, NA, NA, NA, NA), cogn_last_stim_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "NR", "NR", NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, "NR", "NR", NA, NA, NA, NA, NA), n_bcogn_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, 12, 12, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 12, 12, NA, NA, NA, NA, NA), n_pcogn_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, 12, 12, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 12, 12, NA, NA, NA, NA, NA), pr_cognm_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "8.75", "7.58", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "5.65", "6.54", NA, NA, NA, NA, NA), pr_cognsd_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "1.13", "2.84", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "1.10", "1.89", NA, NA, NA, NA, NA), po_cognm_n_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "7.50", "5.33", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "7.50", "2.34", NA, NA, NA, NA, NA), po_cognsd_n_VV2_CRT_error = c(NA, 
    NA, NA, NA, NA, NA, "2.06", "2.42", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "2.06", "2", NA, NA, NA, NA, NA), cogn_name_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "VV2_crt_latency", "VV2_crt_latency", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA), cogn_cite_VV2_CRT_latency = c(NA, NA, NA, NA, NA, 
    NA, "robbins_1994", "robbins_1994", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), cogn_last_stim_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "NR", "NR", NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), n_bcogn_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, 12, 12, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), n_pcogn_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, 12, 12, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), pr_cognm_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "476.05", "465.65", NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), pr_cognsd_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "35.86", "37.54", NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), po_cognm_n_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "460.66", "433.13", NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), po_cognsd_n_VV2_CRT_latency = c(NA, 
    NA, NA, NA, NA, NA, "34.75", "46.70", NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, 
-25L), class = c("tbl_df", "tbl", "data.frame"))
shizzle
  • 89
  • 11

0 Answers0