community!
I'm trying to run FAMD on a morphology-based dataset with 25 qualitative variables recording the presence and absence of fluorescence on a body part (binary) and six quantitative variables. Furthermore, I have a few supplementary variables such as sex, genus and depth.
First I ran the code for the FAMD on my data set after I had removed all missing values with na.omit():
res.famd1<-FAMD(fluo_famd1,sup.var=c(1,2,28,35),graph=FALSE, ncp=5)
and retrieved a bunch of results like eigenvalues, scree plot etc. I then tried to plot my qualitative variables within the two dimensions like in this example: [Example][1]
This is the code I used:
quali.var1 <- get_famd_var(res.famd1, "quali.var")
quali.var1
fviz_famd_var(res.famd1, "quali.var")
Instead of plotting the categories R is plotting decimal numbers I can't explain. [Missing categories][2]
After this I tried running the FAMD on my data set with missing values using the code given in the package description:
require(missMDA)
res.impute <- imputeFAMD(fluo_famd2, ncp=3)
res.famd2 <- FAMD(fluo_famd2,tab.disj=res.impute$tab.disj,sup.var=c(1,2,28))
When trying to plot the categories now, they do appear in the plot but they are doubled and labelled with _0 and _1. [doubled categories][3]
My questions are: Can you identify an obvious mistake? Why would the categories be plotted twice in the graph? Does it have an impact on the overall analysis? Is FAMD suited for a data set like this? [1]: https://i.stack.imgur.com/8UFlA.png [2]: https://i.stack.imgur.com/qb3Cz.png [3]: https://i.stack.imgur.com/O1Dff.png
Please find a subset of my data here:
structure(list(genus = structure(c(5L, 7L, 7L, 7L, 9L, 7L, 7L,
9L, 9L, 7L, 7L, 9L, 7L, 6L, 7L), .Label = c("Cryptochirus",
"Dacryomaia",
"Fizesereneia", "Fungicola", "Hapalocarcinus", "Hiroia",
"Lithoscaptus",
"Neotroglocarcinus", "Opecarcinus", "Pseudohapalocarcinus",
"Xynomaia"
), class = "factor"), sex = structure(c(1L, 1L, 1L, 2L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("f", "m"), class
=
"factor"),
frontal_dorsal = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class =
"factor"),
frontal_ventral = structure(c(1L, 2L, 2L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class =
"factor"),
mesogastric = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 2L, 2L), .Label = c("0", "1"), class =
"factor"),
cardial = structure(c(1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L), .Label = c("0", "1"), class = "factor"),
branchial = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
1L, 1L, 2L, 1L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
ps1 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
ps2 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
ps3 = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
1L, 2L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
ps4 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
ps6 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
telson = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class = "factor"),
eyes = structure(c(1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 1L, 2L), .Label = c("0", "1"), class = "factor"),
eyestalk = structure(c(1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 2L), .Label = c("0", "1"), class = "factor"),
antennules = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("0", "1"), class =
"factor"),
anntenullar_peduncle = structure(c(1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = c("0", "1"), class
=
"factor"),
depth = c(NA, 10.3, 16, 16.1, 14.3, 12.8, 10.8, 12.6, 10.2,
11, 11.9, 13.1, 10.7, 10.1, 12.3), carapace_fluo = c(NA,
NA, 0.0999104660846311, 0.459446596994549, 0.639459602769835,
0.0157309627508303, NA, 0.792912115871697, 0.385646421420439,
0.0934932558564838, 0.118926192063408, 0.334765757290687,
NA, 0.712954991372207, 0.816431146170724), ap_fluo = c(NA,
0, 0.153709650160554, NA, 0.526410945516736, 0,
0.0572985597508758,
NA, 0.0105633802816901, 0.284174213022855, 0.305258467023173,
0.402286503491138, NA, 0, 0.0679211592610398), prod_fluo = c(NA,
0, 0, NA, 0.528576376861794, 0, 0, 0.15260360009031, 0,
0.0252962625341841,
0.241194486983155, 0.0717077570655442, NA, 0.479219143576826,
0), pol_fluo = c(NA, 0, 0, NA, 0, 0, 0, 0.118164567879938,
0, 0, 1, 0, NA, 0.299160251924423, 0), dac_fluo = c(NA, 0,
0, NA, 0, 0, 0, 0.102848534648042, 0, 0, 0.309536216779573,
0, NA, 0.0654761904761905, 0), sum_chel = c(NA, 0, 0, NA,
0.345118733509235, 0, 0, 0.14349725008088, 0, 0.0155266470835082,
0.347599820547331, 0.0451661774453177, NA, 0.32612422524067,
0)), row.names = c(NA, -15L), class = c("tbl_df", "tbl",
"data.frame"))