How do I add select function to select colmuns that I wanted in the glance function?

Question

How do I add "select()" after "glance()" to select values "r.squared" ?

Please provide a reproducible dataset. One way of producing this is with the `dput` command, which you can learn about here: https://youtu.be/3EID3P1oisg — Shawn Hemelstrand, Feb 28 '22 at 02:04

score 0 · Answer 1 · answered Feb 28 '22 at 02:46

There are several methods you can use; here is an example:

library(tidyverse)
library(broom)
#> Warning: package 'broom' was built under R version 4.1.2

data_adult <-read.csv("https://raw.githubusercontent.com/guru99-edu/R-Programming/master/adult.csv")
glimpse(data_adult)
#> Rows: 48,842
#> Columns: 10
#> $ x               <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
#> $ age             <int> 25, 38, 28, 44, 18, 34, 29, 63, 24, 55, 65, 36, 26, 58…
#> $ workclass       <chr> "Private", "Private", "Local-gov", "Private", "?", "Pr…
#> $ education       <chr> "11th", "HS-grad", "Assoc-acdm", "Some-college", "Some…
#> $ educational.num <int> 7, 9, 12, 10, 10, 6, 9, 15, 10, 4, 9, 13, 9, 9, 9, 14,…
#> $ marital.status  <chr> "Never-married", "Married-civ-spouse", "Married-civ-sp…
#> $ race            <chr> "Black", "White", "White", "Black", "White", "White", …
#> $ gender          <chr> "Male", "Male", "Male", "Male", "Female", "Male", "Mal…
#> $ hours.per.week  <int> 40, 50, 40, 40, 30, 30, 40, 32, 40, 10, 40, 40, 39, 35…
#> $ income          <chr> "<=50K", "<=50K", ">50K", ">50K", "<=50K", "<=50K", "<…

data_adult_rescale <- data_adult %>%
  mutate(across(where(is.numeric), ~scale(.x))) %>%
  mutate(
    education = factor(
      ifelse(
        education == "Preschool" |
          education == "10th" |
          education == "11th" |
          education == "12th" |
          education == "1st-4th" |
          education == "5th-6th" |
          education == "7th-8th" |
          education == "9th",
        "dropout",
        ifelse(
          education == "HS-grad",
          "HighGrad",
          ifelse(
            education == "Some-college" |
              education == "Assoc-acdm" | education == "Assoc-voc",
            "Community",
            ifelse(education == "Bachelors", "Bachelors", 
                   ifelse(education == "Masters" | education == "Prof-school", "Master", "PhD")
                   )
            )
          )
        )
      )
    ) %>%
  mutate(marital.status = factor(
    ifelse(
      marital.status == "Never-married" |
        marital.status == "Married-spouse-absent",
      "Not_married",
      ifelse(
        marital.status == "Married-AF-spouse" |
          marital.status == "Married-civ-spouse",
        "Married",
        ifelse(
          marital.status == "Separated" |
            marital.status == "Divorced",
          "Separated",
          "Widow"
        )
      )
    )
  )) %>%
  mutate(workclass = ifelse(workclass == "?", NA, workclass)) %>%
  mutate(income = factor(income)) %>%
  select(-x)

create_train_test <- function(data, size = 0.8, train = TRUE) {
  n_row = nrow(data)
  total_row = size * n_row
  train_sample <- 1: total_row
  if (train == TRUE) {
    return (data[train_sample, ])
  } else {
    return (data[-train_sample, ])
  }
}

data_train <- create_train_test(data_adult_rescale, 0.8, train = TRUE)
data_test <- create_train_test(data_adult_rescale, 0.8, train = FALSE)

logit <- glm(income ~ ., data = data_train, family = 'binomial')
summary(logit)
#> 
#> Call:
#> glm(formula = income ~ ., family = "binomial", data = data_train)
#> 
#> Deviance Residuals: 
#>     Min       1Q   Median       3Q      Max  
#> -2.6985  -0.5935  -0.2641  -0.0702   3.1709  
#> 
#> Coefficients:
#>                            Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)                0.023398   0.215881   0.108  0.91369    
#> age                        0.410256   0.018996  21.597  < 2e-16 ***
#> workclassLocal-gov        -0.624603   0.093467  -6.683 2.35e-11 ***
#> workclassNever-worked     -6.956721  72.524856  -0.096  0.92358    
#> workclassPrivate          -0.515633   0.078443  -6.573 4.92e-11 ***
#> workclassSelf-emp-inc     -0.064390   0.102062  -0.631  0.52811    
#> workclassSelf-emp-not-inc -1.095943   0.090596 -12.097  < 2e-16 ***
#> workclassState-gov        -0.811879   0.105447  -7.699 1.37e-14 ***
#> workclassWithout-pay      -1.077321   0.857160  -1.257  0.20881    
#> educationCommunity        -0.454462   0.081759  -5.559 2.72e-08 ***
#> educationdropout          -1.057308   0.210564  -5.021 5.13e-07 ***
#> educationHighGrad         -0.691164   0.116868  -5.914 3.34e-09 ***
#> educationMaster            0.348331   0.067133   5.189 2.12e-07 ***
#> educationPhD               0.426371   0.154295   2.763  0.00572 ** 
#> educational.num            0.570351   0.070057   8.141 3.91e-16 ***
#> marital.statusNot_married -2.503252   0.050547 -49.524  < 2e-16 ***
#> marital.statusSeparated   -2.141204   0.053698 -39.875  < 2e-16 ***
#> marital.statusWidow       -2.202177   0.123572 -17.821  < 2e-16 ***
#> raceAsian-Pac-Islander     0.103824   0.201573   0.515  0.60651    
#> raceBlack                  0.092367   0.191678   0.482  0.62989    
#> raceOther                 -0.008654   0.275873  -0.031  0.97498    
#> raceWhite                  0.370077   0.182859   2.024  0.04299 *  
#> genderMale                 0.122433   0.042441   2.885  0.00392 ** 
#> hours.per.week             0.374525   0.017029  21.993  < 2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 41133  on 36834  degrees of freedom
#> Residual deviance: 27593  on 36811  degrees of freedom
#>   (2238 observations deleted due to missingness)
#> AIC: 27641
#> 
#> Number of Fisher Scoring iterations: 10
glance(logit)
#> # A tibble: 1 × 8
#>   null.deviance df.null  logLik    AIC    BIC deviance df.residual  nobs
#>           <dbl>   <int>   <dbl>  <dbl>  <dbl>    <dbl>       <int> <int>
#> 1        41133.   36834 -13797. 27641. 27846.   27593.       36811 36835

# To get AIC as a tibble
glance(logit) %>% select(AIC)
#> # A tibble: 1 × 1
#>      AIC
#>    <dbl>
#> 1 27641.

# To get AIC as a vector
glance(logit)$AIC
#> [1] 27641.18

glance(logit)[[4]]
#> [1] 27641.18

^{Created on 2022-02-28 by the reprex package (v2.0.1)}

Obviously you would replace "AIC" with "r.squared" in your case. Does that solve your problem?

Not sure if this is relevant to the question, since it's unclear whether the data you used is similar to the structure of the data the OP didn't share, but why not use `%in%` instead of all these "OR" conditions? — camille, Mar 04 '22 at 17:56
Thanks for your comment @camille. I was going to vote to close this question but I thought perhaps an example might help 'prompt' OP for more details. I found this code (top google hit for 'glm r tutorial'; https://www.guru99.com/r-generalized-linear-model.html) and copied it verbatim except for the last three commands. I absolutely agree that using `%in%` (or `dplyr::case_when()`) makes sense, however I didn't want to spend too much time on this answer unless OP provided more details. — jared_mamrot, Mar 08 '22 at 01:54

How do I add select function to select colmuns that I wanted in the glance function?

1 Answers1