This was the question where it was shown how to use the concept.
I have naive question about this function which was give here which is this where it assigns predicted levels the cluster.
pred2labels = function(pred,actual){
pred = as.character(pred)
actual = as.character(actual)
tab = as.matrix(table(pred,actual))
assignment = colnames(tab)[max.col(tab)]
names(assignment) = rownames(tab)
assignment[pred]
}
I tried to do the same my question is do i need to generate a predicted labels for my data of clusters?
here is my data frame
dput(bb)
structure(list(FAB = structure(c(4L, 2L, 5L, 3L, 4L, 5L, 4L,
4L, 5L, 3L, 4L, 2L, 4L, 3L, 2L, 3L, 5L, 5L, 4L, 3L, 2L, 5L, 3L,
5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 2L, 3L, 5L, 3L, 5L, 3L,
2L, 1L, 6L, 6L, 5L, 5L, 5L, 6L, 6L, 3L, 7L, 3L, 5L, 6L, 2L, 5L,
2L, 3L, 3L, 2L, 6L, 2L, 2L, 2L, 2L, 1L, 6L, 2L, 5L, 2L, 2L, 9L,
5L, 1L, 5L, 2L, 5L, 5L, 6L, 2L, 3L, 6L, 5L, 2L, 1L, 8L, 3L, 5L,
3L, 6L, 1L, 2L, 2L, 5L, 3L, 5L, 6L, 5L, 5L, 3L, 5L, 3L, 2L, 3L,
3L, 2L, 6L, 1L, 2L, 3L, 6L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L,
2L, 5L, 2L, 3L, 2L, 3L, 5L, 1L, 3L, 1L, 6L, 5L, 5L, 3L, 5L, 3L,
2L, 1L, 2L, 5L, 7L, 8L, 6L, 2L, 8L, 3L, 3L, 1L, 2L, 2L, 2L, 1L,
3L, 6L, 5L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 5L, 2L, 9L, 2L, 1L, 1L,
2L, 6L, 6L), .Label = c("M0", "M1", "M2", "M3", "M4", "M5", "M6",
"M7", "nc"), class = "factor"), RISK_CYTO = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 2L,
4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 2L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L), .Label = c("Good", "Intermediate",
"N.D.", "Poor"), class = "factor"), Class = c(1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 2L, 4L, 1L, 4L,
1L, 4L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 4L, 5L,
3L, 2L, 2L, 4L, 3L, 6L, 2L, 2L, 6L, 2L, 2L, 2L, 3L, 6L, 5L, 2L,
2L, 3L, 6L, 2L, 4L, 5L, 6L, 2L, 3L, 3L, 4L, 5L, 3L, 5L, 4L, 2L,
4L, 3L, 4L, 2L, 3L, 4L, 4L, 5L, 2L, 2L, 5L, 5L, 2L, 4L, 4L, 6L,
6L, 4L, 2L, 3L, 5L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 2L,
4L, 3L, 5L, 4L, 6L, 2L, 5L, 4L, 3L, 4L, 5L, 4L, 2L, 4L, 6L, 4L,
1L, 4L, 5L, 6L, 1L, 4L, 4L, 5L, 4L, 2L, 3L, 5L, 3L, 5L, 2L, 2L,
4L, 2L, 1L, 4L, 3L, 5L, 5L, 6L, 2L, 2L, 3L, 6L, 1L, 5L, 5L, 5L,
5L, 3L, 3L, 6L, 5L, 4L, 6L, 3L, 5L, 5L, 5L, 5L, 5L, 2L, 1L, 5L,
6L, 5L, 5L, 6L, 2L, 2L)), row.names = c(NA, -170L), class = "data.frame")
My steps were this
library(irr)
clus_arrange = bb %>% dplyr::select(Class,FAB)
names(clus_arrange)[1] = "clus"
clus_arrange$predicted_label = pred2labels(clus_arrange$clus,clus_arrange$FAB)
kappam.light(cluster_r)
My output is this
Light's Kappa for m Raters
Subjects = 170
Raters = 2
Kappa = 0.266
z = 6.62
p-value = 3.58e-11
My question is the approach right way of doing it what i had followed from that answer?
UPDATED ANSWER BASED ON THIS TUTORIAL
table <- table(clus_arrange$FAB, clus_arrange$clus)
table
kappam.fleiss(table, detail=TRUE)
My question is which one is methodically and logically correct