0

I have the following data sets that I want to compare for similarities in the elements they contain using a looping strategy that allows all possible combinations (i.e., "setA, setB, setC, setD"; "setA, setB, setC"; "setA, setB", "setB, setC, setD"; "setC, setD"; "setB,setD" etc

Data sets:

setA <- c("dog", "cat", "cow", "sheep", "dunkey")

setB <- c("fox", "cat", "cow", "snake")

setC <- c("dog", "cat", "cow", "sheep", "dunkey", "fox", "python")

setD <- c("dog", "cat", "lion", "sheep", "elephant", "fox")

Not sure how to code this in R, but here's my attempt which did not produce expected results:

similar <- function(...){
     Reduce(intersect, list(...))

}


allSets <- list(setA, setB, setC, setD)

for(i in 1:length(allSets)){
   similar(allSets[[i]])
   similar(allSets[i-1])
   similar(allSets[i-2])
   similar(allSets[i-3])
}

Can anyone help pls?

Rob John
  • 277
  • 1
  • 3
  • 12

1 Answers1

2

Here is a function from a previous post to get all the intersections

## Build intersections, 'out' accumulates the result
intersects <- function(sets, out=NULL) {
    if (length(sets) < 2) return ( out )                               # return result
    len <- seq(length(sets))
    if (missing(out)) out <- list()                                    # initialize accumulator
    for (idx in split((inds <- combn(length(sets), 2)), col(inds))) {  # 2-way combinations
        ii <- len > idx[2] & !(len %in% idx)                           # indices to keep for next intersect
        out[[(n <- paste(names(sets[idx]), collapse="."))]] <- intersect(sets[[idx[1]]], sets[[idx[2]]])
        out <- intersects(append(out[n], sets[ii]), out=out)
    }
    out
}

## Put the sets in a list
sets <- mget(paste0("set", toupper(letters[1:4])))

intersects(sets)
# $setA.setB
# [1] "cat" "cow"
# 
# $setA.setB.setC
# [1] "cat" "cow"
# 
# $setA.setB.setC.setD
# [1] "cat"
# 
# $setA.setB.setD
# [1] "cat"
# 
# $setC.setD
# [1] "dog"   "cat"   "sheep" "fox"  
# 
# $setA.setC
# [1] "dog"    "cat"    "cow"    "sheep"  "dunkey"
# 
# $setA.setC.setD
# [1] "dog"   "cat"   "sheep"
# 
# $setA.setD
# [1] "dog"   "cat"   "sheep"
# 
# $setB.setC
# [1] "fox" "cat" "cow"
# 
# $setB.setC.setD
# [1] "fox" "cat"
# 
# $setB.setD
# [1] "fox" "cat"
Community
  • 1
  • 1
Rorschach
  • 31,301
  • 5
  • 78
  • 129