-2

My previous post was deleted addressing me to other post which actually it is not useful for my purpose, because this post only has 2 columns to compare

My code

data_samp <- 
structure(list(paciente = structure(c(6036, 6299, 6324, 6457, 
6201, 6092, 6409, 6268, 6126, 6306, 6119, 6297, 6467, 6123, 6249, 
6304, 6496, 6210, 6306, 6197), label = "Paciente", format.spss = "F6.0"), 
    sexo_s1 = structure(c(2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L), .Label = c("Hombre", 
    "Mujer"), label = "Sexo", class = "factor"), edad_s1 = structure(c(61, 
    60, 61, 61, 64, 67, 71, 60, 62, 62, 65, 74, 62, 61, 74, 69, 
    62, 69, 62, 62), label = "Edad", format.spss = "F3.0"), grupo_int_v00 = structure(c(1L, 
    1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 
    1L, 2L, 1L, 1L), .Label = c("A", "B"), label = "Grupo de intervención", class = "factor"), 
    peso1_v00 = structure(c(93.6, 78.8, 94.5, 99, 72.1, 86.2, 
    91.6, 88.2, 78.6, 82.5, 72, 90.6, 103.2, 117.2, 98, 90.9, 
    78.4, 83, 82.5, 98), label = "Peso: 1a determinación", format.spss = "F5.1"), 
    cintura1_v00 = structure(c(111.6, 105, 111, 107, 92.5, 102, 
    115.5, 103, 102, 106.5, 101.5, 111.5, 114, 134, 123, 107, 
    104, 114.5, 106.5, 132), label = "Cintura: 1a determinación", format.spss = "F5.1"), 
    tasis2_e_v00 = structure(c(125, 137, 137, 136, 124, 146, 
    162, 123, 131, 124, 115, 154, 122, 122, 158, 130, 152, 137, 
    124, 152), label = "TA: tensión arterial 2: sistólica", format.spss = "F4.0"), 
    tadias2_e_v00 = structure(c(81, 79, 83, 84, 62, 76, 95, 65, 
    65, 49, 72, 73, 71, 84, 65, 76, 69, 72, 49, 90), label = "TA: tensión arterial 2: diastólica", format.spss = "F4.0"), 
    p17_total_v00 = structure(c(4, 7, 8, 5, 5, 10, 8, 7, 9, 10, 
    10, 6, 14, 5, 6, 6, 9, 9, 10, 6), label = "Cuestionario de 17 puntos: Suma de puntuación de P17", format.spss = "F3.0"), 
    geaf_tot_v00 = structure(c(1090.91, 1398.6, 4895.1, 5734.27, 
    447.55, 1622.38, 3356.64, 923.08, 1678.32, 1976.69, 3184.15, 
    3251.75, 2293.71, 671.33, 7370.63, 335.66, 993.01, 466.2, 
    1976.69, 0), label = "AF: Gasto energético en actividad física total (MET•min/sem)", format.spss = "F8.2"), 
    peso1_v66 = structure(c(91.6, 78.7, 89.5, 91.5, 68.2, 82.4, 
    87, 89.5, 75.6, 79, 70.3, 84.5, 96.5, 118, 86.2, 88.7, 73.2, 
    80, 79, 90.5), label = "Peso: 1a determinación", format.spss = "F5.1"), 
    cintura1_v66 = structure(c(109, 105, 105.2, 99, 86, 95, 112, 
    104, 95, 103, 98, 107, NA, 135.5, 106.5, 105.8, 105, 108, 
    103, 122.5), label = "Cintura: 1a determinación", format.spss = "F5.1"), 
    tasis2_e_v66 = structure(c(137, 138, 129, 119, 136, 146, 
    144, 138, 131, 131, 111, 144, NA, 132, 172, 106, 142, 129, 
    131, 131), label = "TA: tensión arterial 2: sistólica", format.spss = "F4.0"), 
    tadias2_e_v66 = structure(c(65, 72, 76, 74, 71, 69, 63, 56, 
    69, 69, 77, 80, NA, 66, 92, 67, 71, 63, 69, 85), label = "TA: tensión arterial 2: diastólica", format.spss = "F4.0", display_width = 13L), 
    p17_total_v66 = structure(c(2, 14, 11, 10, 7, 15, 11, 8, 
    13, 13, 12, 12, 10, 10, 15, 7, 15, 12, 13, 11), label = "Cuestionario de 17 puntos: Suma total de P17", format.spss = "F3.0"), 
    geaf_tot_v66 = structure(c(223.78, 406.53, 2797.2, 4055.94, 
    1678.32, 1566.43, 1860.14, 1657.34, 2349.65, 4195.8, 4461.54, 
    4090.91, 1538.46, 938, 6153.85, 769.23, 3286.71, 2517.48, 
    4195.8, 139.86), label = "AF: Gasto energético en actividad física total (MET•min/sem)", format.spss = "F8.2"), 
    peso1_v01 = structure(c(87.2, 77.4, 88, 95, 66.5, 79.2, 86.1, 
    93, 73, 71.7, 71.5, 83, 97, 117.7, 83, 90.5, 76.5, 77.5, 
    71.7, 93), label = "Peso: 1a determinación", format.spss = "F5.1"), 
    cintura1_v01 = structure(c(106.5, 106, 104, 103.5, 85, 94, 
    112, NA, 94, 95, 98, 106, NA, 135, 105, 108, 105.5, 105, 
    95, 124.5), label = "Cintura: 1a determinación", format.spss = "F5.1"), 
    tasis2_e_v01 = structure(c(115, 136, 126, 131, 145, 150, 
    138, NA, 144, 119, 115, 143, NA, 115, 145, 128, 146, 129, 
    119, 139), label = "TA: tensión arterial 2: sistólica", format.spss = "F4.0"), 
    tadias2_e_v01 = structure(c(58, 67, 75, 80, 72, 79, 64, NA, 
    71, 47, 80, 69, NA, 65, 80, 75, 62, 60, 47, 88), label = "TA: tensión arterial 2: diastólica", format.spss = "F4.0"), 
    geaf_tot_v01 = structure(c(839.16, 499.77, 3804.2, 1230.77, 
    223.78, 2582.75, 4055.94, 1132.87, 2769.23, 1734.27, 2526.81, 
    3575.76, 839.16, 3169.23, 5616.78, 1048.95, 2284.38, 2307.69, 
    1734.27, 559.44), label = "AF: Gasto energético en actividad física total (MET•min/sem)", format.spss = "F8.2")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame")) 

Identification variable: paciente Sex: sexo_s1 Variable1_v00: variable 1 at baseline Variable1_v66: variable 1 at 6-months Variable1_v01: variable 1 at 12-months Variable2_v00: variable 2 at baseline Variable2_v66: variable 2 at 6-months Variable2_v01: variable 2 at 12-months . . . My intention is to compare within-subject comparison (patient1 comparing each variable (variable1..) between the different time points (00, 66 and 01)E.g.

Patient 1 variable1_v00 vs variable1_v66 variable1_v00 vs variable1_v01 variable1_v66 vs variable1_v01

I have more than 20 variables to compare, and more than 200 individuals. I have modified data into longitudinal data, to have time as a factor, but I cannot run it wiht ANOVA (remember the analysis must be pairwise!!)

The suggested code does sthg like this (just a slice of the end)

do(tidy(t.test(.$Price_Online, 
                 .$Price_Offline,

But I have more than two variables. Of course I could do it manually, but this is the mission of programming

Thanks in advance

Edit 1:

Programming approach to get the objectives. Must run taking into account the structure of the dataset posted (data_samp)

# Both groups of intervention: A and B

c("p17_total", "geaf_tot", "glucosa", "homa", "coltot", "peso1", 
"cintura1", "tadias2_e", "tasis2_e", "hba1c", "trigli", "col_rema", 
"ldl_calc", "hdl", "i_huglp1", "i_hugip", "i_hupai1", "i_huvisfatin", 
"i_huresistin", "i_huleptin", "i_huglucagon", "i_huinsulin", 
"i_hucpeptide", "i_hughrelin", "i_pcr", "i_ratiolg", "albumi"
)

# Represents: baseline, 6months, 6m-change, 12m, 12m-change 
visit <- c("_v00", "_v66", "_v66", "_v01", "_v01")


i=1
tab <- NA
for(i in 1:length(rowvars)){
  #For each variable in 'rowvars', writes the corresponding to each visit. 
  #eg: "p17_total_v00","p17_total_v66","d_p17_total_v66","p17_total_v01","d_p17_total_v01"
  #eg: "i_hugip_v00","i_hugip_v66","d_hugip_v66","i_hugip_v01","d_hugip_v01"
  v <- paste0(rowvars[i], visit)
  v[c(3,5)] <- paste0("d_", v[c(3,5)])
  v <- sub("d_i_", "d_", v)

Now we get some descriptive statistic for the whole dataset or separating according some grouping variable

#Computes "mean (n)" of each variable in 'v'. Both groups of int.
  MEANs <- sapply(dat[,v], mean, na.rm = TRUE)
  MEANs <- sprintf("%.2f", round(MEANs, digits = 2))
  SD <- sapply(dat[,v], sd, na.rm = TRUE)
  SD <- sprintf("%.2f", round(SD, digits = 2))


 #Computes "mean (n)" of each variable in 'v' considering both groups of intervention.
  MEANsI <- sapply(subset(dat, grupo_int_v00 == "A")[,v], mean, na.rm = TRUE)
  MEANsI <- sprintf("%.2f", round(MEANsI, digits = 2))
  MEANsC <- sapply(subset(dat, grupo_int_v00 == "B")[,v], mean, na.rm = TRUE)
  MEANsC <- sprintf("%.2f", round(MEANsC, digits = 2))
  SDI <- sapply(subset(dat, grupo_int_v00 == "A")[,v], sd, na.rm = TRUE)
  SDI <- sprintf("%.2f", round(SDI, digits = 2))
  SDC <- sapply(subset(dat, grupo_int_v00 == "B")[,v], sd, na.rm = TRUE)
  SDC <- sprintf("%.2f", round(SDC, digits = 2))

Here it comes my problem. First I can establish a comparison without the groups of intervention

 #Computes paired T-test between pairs of v00, v66, v01
  P1 <- t.test(x=dat[,v[1]], y=dat[,v[2]], paired = TRUE)$p.value #v00 vs v66
  P2 <- t.test(x=dat[,v[1]], y=dat[,v[4]], paired = TRUE)$p.value #v00 vs v01
  P3 <- t.test(x=dat[,v[2]], y=dat[,v[4]], paired = TRUE)$p.value #v66 vs v01

Selecting group A or B (grupo_int_v00)

t.test(subset(dat, grupo_int_v00 == "A")[,v[1]], subset(dat, grupo_int_v00 == "A")[,v[2]], paired = T , na.rm = T)$p.value

# Error: Must subset columns with a valid subscript vector.
i Logical subscripts must match the size of the indexed input.
x Input has size 1 but subscript `yok` has size 206.

If I changed paired to F, it gives me a value. But I need to be paired. Before selecting grupo_int_V00, I checked if the values within the loop agrees. It works

So this is what I tried as one approach

2nd approach

Wiht the dplyr package sthg less thorough Error: problem - neet to combinate and interate 00 - 66 -01

dat %>% 
  select(paciente, matches("_v00|_v01|_v66")) %>% 
  pivot_longer(!c(grupo_int_v00, paciente)) %>% 
  separate(name, into=c("name", "time"), sep="_v") %>% 
  group_by(time) %>% mutate(time = factor(time)) 
  group_by(...) %>%

## and here I get errors 


1 -    group_by(paciente) %>% 
+     purrr::map2_dbl(where(is.numeric), ~ t.test(.x ~ time, paired = T))

2 -   do(tidy(t.test...

Ì am pretty sure that I've tried something else on the road to get it

If sthg else is needed to clarify I would be grateful to edit again

  • I fear your approach is possibly fundamentally incorrect but you've not provided enough context to be sure. By focussing on a programming issue, I fear you may be overlooking a more fundamental principle. I've suggested migration, but I think you may stillbe closed for lack of focus or detail. – Limey Jan 17 '22 at 09:22
  • I guess with the edit it must be more clear – Javier Hernando Jan 17 '22 at 13:07

1 Answers1

0

Finally, someone with more expertise on the field provided me wiht some light Somehow, the data is a tibble instead of data.frame. I was told that tibble has different behaviour for base syntax


class(dat)
"tbl_df"     "tbl"        "data.frame"
dat <- as.data.frame(dat)


#Computes paired T-test between pairs of v00, v66, v01
P1 <- t.test(subset(dat, grupo_int_v00 == "A")[,v[1]], subset(dat, grupo_int_v00 == "A")[,v[2]], paired = TRUE)$p.value
1.695603e-32