1

My data example

cross=structure(list(a = c(2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 
2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L), b = c(1L, 1L, 1L, 2L, 2L, 2L, 
1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), c = c(1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L
), d = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, NA, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), e = c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 
1L, NA, 2L, 2L, 2L, 2L, 1L, 2L, 1L), f = c(2L, 2L, 2L, 2L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), g = c(NA, 
1L, 2L, NA, 2L, 2L, 1L, 2L, 1L, NA, NA, NA, NA, NA, 1L, NA, NA, 
NA), h = c(2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), i = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), j = c(2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), k = c(2L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, NA, 1L, 2L, 1L, 1L, 1L, 1L, 
1L), l = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, NA, 2L, NA, 
1L, 1L, 1L, 1L, 2L), m = c(1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 2L, NA, 2L, 2L, 2L, 1L), n = c(1L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, NA, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L), xxx = c(2L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
2L)), class = "data.frame", row.names = c(NA, -18L))

I need perform cross tab between all categorical variables I.E.

a vs b
a vs c
...
d vs c
m vs n

and so on

i tried use table command ,but it provided not result that i need,cause i need also calculation of the chi-square test . That why ideally desired result for me would be like this

b       total
        1   2   
a   1   42  54  96
    2   40  35  75
total   82  89  171

and p-value for chi-square

Value(empirical)    df       p-value
chi-square  1,549   1   0,213

How to do crosstab between combinations of variables at once and get similar output results? Namely, the table and the chi-square statistics for it.

Grateful for any help.

quest
  • 35
  • 4
  • Does this answer your question? [performing a chi square test across multiple variables and extracting the relevant p value in R](https://stackoverflow.com/questions/26107287/performing-a-chi-square-test-across-multiple-variables-and-extracting-the-releva) – Limey Jun 27 '22 at 12:27

1 Answers1

5

To get all combinations of column names (there will be 105 combinations of 2 columns if you are starting with 15 columns) you can use the combn function:

cmb <- combn(names(cross), 2)

You can use this to get a contingency table for each column pair using apply:

tabs <- apply(cmb, 2, function(x) table(cross[[x[1]]], cross[[x[2]]]))

This then allows you to calculate your p values using sapply. With the data you provided, Chi square tests throw an error due to the small numbers involved, which invalidate the assumptions of the test. We can use the Fisher Exact test instead in this situation, though if your own data is much larger you may wish to stick to Chi Square.

pvals <- sapply(tabs, function(x) {
  if(all(dim(x) == c(2, 2))) fisher.test(x)$p.val else NA
  })

Finally, we can create a results data frame with the two columns compared, and the p value for the ratio of proportions between each two columns. Note that depending on your use case, your threshold for significance should be lowered to account for multiple hypothesis testing, as one would expect to have around 5 'significant' p values of < 0.05 purely by chance.

data.frame(col1 = cmb[1,], col2 = cmb[2,], pval = pvals)
#>     col1 col2         pval
#> 1      a    b 1.0000000000
#> 2      a    c 1.0000000000
#> 3      a    d           NA
#> 4      a    e 1.0000000000
#> 5      a    f 0.2682072829
#> 6      a    g 1.0000000000
#> 7      a    h 0.5294117647
#> 8      a    i 0.6148459384
#> 9      a    j           NA
#> 10     a    k 0.6000323206
#> 11     a    l 0.6043956044
#> 12     a    m 0.5927601810
#> 13     a    n 1.0000000000
#> 14     a  xxx 1.0000000000
#> 15     b    c 0.0114379085
#> 16     b    d           NA
#> 17     b    e 1.0000000000
#> 18     b    f 0.3259803922
#> 19     b    g 1.0000000000
#> 20     b    h 0.4967320261
#> 21     b    i 1.0000000000
#> 22     b    j           NA
#> 23     b    k 0.2800581771
#> 24     b    l 0.2335164835
#> 25     b    m 0.6220691074
#> 26     b    n 1.0000000000
#> 27     b  xxx 1.0000000000
#> 28     c    d           NA
#> 29     c    e 1.0000000000
#> 30     c    f 1.0000000000
#> 31     c    g 0.4285714286
#> 32     c    h 1.0000000000
#> 33     c    i 1.0000000000
#> 34     c    j           NA
#> 35     c    k 0.5378151261
#> 36     c    l 0.5164835165
#> 37     c    m 0.1029411765
#> 38     c    n 0.2605042017
#> 39     c  xxx 0.2450980392
#> 40     d    e           NA
#> 41     d    f           NA
#> 42     d    g           NA
#> 43     d    h           NA
#> 44     d    i           NA
#> 45     d    j           NA
#> 46     d    k           NA
#> 47     d    l           NA
#> 48     d    m           NA
#> 49     d    n           NA
#> 50     d  xxx           NA
#> 51     e    f 0.2800581771
#> 52     e    g 0.4285714286
#> 53     e    h 0.5147058824
#> 54     e    i 0.5840336134
#> 55     e    j           NA
#> 56     e    k 1.0000000000
#> 57     e    l 0.6043956044
#> 58     e    m 0.0076173826
#> 59     e    n 0.0631868132
#> 60     e  xxx 0.0276341306
#> 61     f    g 0.4285714286
#> 62     f    h 1.0000000000
#> 63     f    i 0.5827497666
#> 64     f    j           NA
#> 65     f    k 1.0000000000
#> 66     f    l 1.0000000000
#> 67     f    m 1.0000000000
#> 68     f    n 0.6000323206
#> 69     f  xxx 1.0000000000
#> 70     g    h 1.0000000000
#> 71     g    i 1.0000000000
#> 72     g    j           NA
#> 73     g    k 1.0000000000
#> 74     g    l 1.0000000000
#> 75     g    m 0.4285714286
#> 76     g    n 1.0000000000
#> 77     g  xxx 1.0000000000
#> 78     h    i 0.4901960784
#> 79     h    j           NA
#> 80     h    k 1.0000000000
#> 81     h    l 0.4500000000
#> 82     h    m 0.4852941176
#> 83     h    n 1.0000000000
#> 84     h  xxx 0.5294117647
#> 85     i    j           NA
#> 86     i    k 0.2605042017
#> 87     i    l 0.2445054945
#> 88     i    m 1.0000000000
#> 89     i    n 0.6000323206
#> 90     i  xxx 1.0000000000
#> 91     j    k           NA
#> 92     j    l           NA
#> 93     j    m           NA
#> 94     j    n           NA
#> 95     j  xxx           NA
#> 96     k    l 0.5164835165
#> 97     k    m 1.0000000000
#> 98     k    n 0.5467032967
#> 99     k  xxx 0.6000323206
#> 100    l    m 0.2351648352
#> 101    l    n 0.1538461538
#> 102    l  xxx 0.0631868132
#> 103    m    n 0.0013736264
#> 104    m  xxx 0.0005656109
#> 105    n  xxx 0.0001616031

Created on 2022-06-27 by the reprex package (v2.0.1)

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87