Custom column names as arguments in the functions of stability R package

Question

I developed the stability R package which can be installed from CRAN.

install.packages("stability")

However, I have difficulty in making it to take custom column names as function arguments. Here is an example of add_anova function

library(stability)
data(ge_data)

YieldANOVA <-
  add_anova(
      .data = ge_data
    , .y    = Yield
    , .rep  = Rep
    , .gen  = Gen
    , .env  = Env
  )
YieldANOVA

The above code works fine. However, when I change the column names of the data.frame, it doesn't work as below:

df1 <- ge_data
names(df1) <- c("G", "Institute", "R", "Block", "E", "Y")

fm1 <-
  add_anova(
      .data = df1
    , .y    = Y
    , .rep  = Rep
    , .gen  = G
    , .env  = E
  )

Error in model.frame.default(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E +  : 
  invalid type (NULL) for variable '.data$Rep'

Similarly another function stab_reg

fm1Reg <-
  stab_reg(
      .data = df1
    , .y    = Y
    , .gen  = G
    , .env  = E
  )

Error in eval(predvars, data, env) : object 'Gen' not found

The codes of these functions can be accessed by

getAnywhere(add_anova.default)

function (.data, .y, .rep, .gen, .env) 
{
    Y <- enquo(.y)
    Rep <- enquo(.rep)
    G <- enquo(.gen)
    E <- enquo(.env)
    fm1 <- lm(formula = terms(.data$Y ~ .data$E + .data$Rep:.data$E + 
        .data$G + .data$G:.data$E, keep.order = TRUE), data = .data)
    fm1ANOVA <- anova(fm1)
    rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env", 
        "Residuals")
    fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
    fm1ANOVA[2, 4] <- NA
    fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1, 
        1], fm1ANOVA[2, 1])
    fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2, 
        1], fm1ANOVA[5, 1])
    class(fm1ANOVA) <- c("anova", "data.frame")
    return(list(anova = fm1ANOVA))
}
<bytecode: 0xc327c28>
<environment: namespace:stability>

and

   getAnywhere(stab_reg.default)

function (.data, .y, .rep, .gen, .env) 
{
    Y <- enquo(.y)
    Rep <- enquo(.rep)
    G <- enquo(.gen)
    E <- enquo(.env)
    g <- length(levels(.data$G))
    e <- length(levels(.data$E))
    r <- length(levels(.data$Rep))
    g_means <- .data %>% dplyr::group_by(!!G) %>% dplyr::summarize(Mean = mean(!!Y))
    names(g_means) <- c("G", "Mean")
    DataNew <- .data %>% dplyr::group_by(!!G, !!E) %>% dplyr::summarize(GEMean = mean(!!Y)) %>% 
        dplyr::group_by(!!E) %>% dplyr::mutate(EnvMean = mean(GEMean))
    IndvReg <- lme4::lmList(GEMean ~ EnvMean | Gen, data = DataNew)
    IndvRegFit <- summary(IndvReg)
    StabIndvReg <- tibble::as_tibble(data.frame(g_means, Slope = coef(IndvRegFit)[, 
        , 2][, 1], LCI = confint(IndvReg)[, , 2][, 1], UCI = confint(IndvReg)[, 
        , 2][, 2], R.Sqr = IndvRegFit$r.squared, RMSE = IndvRegFit$sigma, 
        SSE = IndvRegFit$sigma^2 * IndvRegFit$df[, 2], Delta = IndvRegFit$sigma^2 * 
            IndvRegFit$df[, 2]/r))
    MeanSlopePlot <- ggplot(data = StabIndvReg, mapping = aes(x = Slope, 
        y = Mean)) + geom_point() + geom_text(aes(label = G), 
        size = 2.5, vjust = 1.25, colour = "black") + geom_vline(xintercept = 1, 
        linetype = "dotdash") + geom_hline(yintercept = mean(StabIndvReg$Mean), 
        linetype = "dotdash") + labs(x = "Slope", y = "Mean") + 
        scale_x_continuous(sec.axis = dup_axis(), labels = scales::comma) + 
        scale_y_continuous(sec.axis = dup_axis(), labels = scales::comma) + 
        theme_bw()
    return(list(StabIndvReg = StabIndvReg, MeanSlopePlot = MeanSlopePlot))
}
<bytecode: 0xe431010>
<environment: namespace:stability>

Note we prefer a technical style of writing here. We gently discourage greetings, hope-you-can-helps, thanks, advance thanks, notes of appreciation, regards, kind regards, signatures, please-can-you-helps, chatty material and abbreviated txtspk, pleading, how long you've been stuck, voting advice, meta commentary, etc. Just explain your problem, and show what you've tried, what you expected, and what actually happened. — halfer, Jul 09 '18 at 17:43

akrun · Accepted Answer · 2018-07-08T07:07:34.117

One of the problems in the data 'df1' is the column name is 'R' instead of "Rep" which was passed into the function. Second, the terms passed into the formula are quosures. we could change it to string with quo_names and then construct formula with paste

add_anova1 <- function (.data, .y, .rep, .gen, .env) {
    y1 <- quo_name(enquo(.y))
    r1 <- quo_name(enquo(.rep))
    g1 <- quo_name(enquo(.gen))
    e1 <- quo_name(enquo(.env))

    fm <- formula(paste0(y1, "~", paste(e1, paste(r1, e1, sep=":"), 
                  g1, paste(g1, e1, sep=":"), sep="+")))

    fm1 <- lm(terms(fm, keep.order = TRUE), data = .data)
    fm1ANOVA <- anova(fm1)
    rownames(fm1ANOVA) <- c("Env", "Rep(Env)", "Gen", "Gen:Env", 
        "Residuals")
    fm1ANOVA[1, 4] <- fm1ANOVA[1, 3]/fm1ANOVA[2, 3]
    fm1ANOVA[2, 4] <- NA
    fm1ANOVA[1, 5] <- 1 - pf(as.numeric(fm1ANOVA[1, 4]), fm1ANOVA[1, 
        1], fm1ANOVA[2, 1])
    fm1ANOVA[2, 5] <- 1 - pf(as.numeric(fm1ANOVA[2, 4]), fm1ANOVA[2, 
        1], fm1ANOVA[5, 1])
    class(fm1ANOVA) <- c("anova", "data.frame")
    return(list(anova = fm1ANOVA))

 }

YieldANOVA2 <- add_anova1(
      .data = df1
    , .y    = Y
    , .rep  = R
    , .gen  = G
    , .env  = E
  )

-checking with the output generated using 'ge_data' without changing the column names

all.equal(YieldANOVA, YieldANOVA2, check.attributes = FALSE)
#[1] TRUE

Similarly stab_reg could be changed

Thanks @akrun for useful answer. – MYaseen208 Jul 08 '18 at 07:35 — MYaseen208, Jul 08 '18 at 07:35

Custom column names as arguments in the functions of stability R package

1 Answers1