1

I can't get the average accuracies (proportion of TRUE values) in Correct_answers columns for the groups chart type and condition.

data

structure(list(Element = structure(c(1L, 1L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6"), class = "factor"), Correct_answer = structure(c(2L, 
2L, 2L, 1L, 2L), .Label = c("FALSE", "TRUE"), class = "factor"), 
    Response_time = c(25.155, 6.74, 28.649, 16.112, 105.5906238
    ), Chart_type = structure(c(2L, 2L, 1L, 1L, 1L), .Label = c("Box", 
    "Violin"), class = "factor"), Condition = structure(c(1L, 
    2L, 1L, 2L, 1L), .Label = c("0", "1"), class = "factor")), row.names = c(NA, 
5L), class = "data.frame")

Average by chart_type

av_data_chartType <- data %>% group_by(Chart_type) %>% summarise_each(funs(mean, sd))

Average by condition

av_data_conition <- data %>% group_by(Condition) %>% summarise_each(funs(mean, sd))

No mean produced for accuracy

NA value is place where accuracy should be.

Community
  • 1
  • 1
Zizi96
  • 459
  • 1
  • 6
  • 23
  • 1
    What do you mean by accuracy? The metric? Is this what you need? `with(df,table(Correct_answer==T))` – NelsonGon Jun 11 '19 at 12:51
  • @NelsonGon The column "Correct_answer" has logical vectors, if Correct_answer is TRUE then it means that participant got the question right. I want to see what proportion/percentage of participants responded correctly with respect to the groups I mentioned. – Zizi96 Jun 11 '19 at 12:56

3 Answers3

3

This should work:

a$Correct_answer <- as.logical(a$Correct_answer)

av_data_chartType <- a %>% select(Chart_type, Correct_answer) %>% group_by(Chart_type) %>% summarise_each(funs(mean, sd))

av_data_chartType <- a %>% select(Condition, Correct_answer) %>% group_by(Condition) %>% summarise_each(funs(mean, sd))

You had 2 problems:

  1. Your Correct_answer was a factor.

  2. You tried to calculate your functions over every Column

Max Teflon
  • 1,760
  • 10
  • 16
3

You probably need

library(dplyr)

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Chart_type, Condition) %>%
  summarise(avg = mean(Correct_answer))

Or if you need them separately

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Chart_type) %>%
  summarise(avg = mean(Correct_answer))

data %>%
  mutate(Correct_answer = as.logical(Correct_answer)) %>%
  group_by(Condition) %>%
  summarise(avg = mean(Correct_answer))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
3

Reproducing your code I had a warning that led me to the answer : you shouldn't compute statistics on factor variables. If you know what you are doing you can convert them to numeric :

data <- structure(list(Element = structure(c(1L, 1L, 1L, 1L, 1L), 
                                         .Label = c("1", "2", "3", "4", "5", "6"), 
                                         class = "factor"), 
                     Correct_answer = structure(c(2L, 2L, 2L, 1L, 2L), 
                                                .Label = c("FALSE", "TRUE"), 
                                                class = "factor"), 
                     Response_time = c(25.155, 6.74, 28.649, 16.112, 105.5906238
                     ), 
                     Chart_type = structure(c(2L, 2L, 1L, 1L, 1L), 
                                            .Label = c("Box", 
                                                       "Violin"), 
                                            class = "factor"), 
                     Condition = structure(c(1L, 2L, 1L, 2L, 1L), 
                                           .Label = c("0", "1"), 
                                           class = "factor")),
                row.names = c(NA, 5L), class = "data.frame")

library("dplyr", warn.conflicts = FALSE)
data <- data %>% as_tibble

# av_data_chartType 
data %>% 
        group_by(Chart_type) %>%
        mutate_if(.predicate = is.factor, .funs = as.numeric) %>% 
        summarise_each(list( ~mean, ~sd))
#> `mutate_if()` ignored the following grouping variables:
#> Column `Chart_type`
#> # A tibble: 2 x 9
#>   Chart_type Element_mean Correct_answer_~ Response_time_m~ Condition_mean
#>   <fct>             <dbl>            <dbl>            <dbl>          <dbl>
#> 1 Box                   1             1.67             50.1           1.33
#> 2 Violin                1             2                15.9           1.5 
#> # ... with 4 more variables: Element_sd <dbl>, Correct_answer_sd <dbl>,
#> #   Response_time_sd <dbl>, Condition_sd <dbl>

# av_data_condition
data %>% 
        group_by(Condition) %>%
        mutate_if(.predicate = is.factor, .funs = as.numeric) %>% 
        summarise_each(list( ~mean, ~sd))
#> `mutate_if()` ignored the following grouping variables:
#> Column `Condition`
#> # A tibble: 2 x 9
#>   Condition Element_mean Correct_answer_~ Response_time_m~ Chart_type_mean
#>   <fct>            <dbl>            <dbl>            <dbl>           <dbl>
#> 1 0                    1              2               53.1            1.33
#> 2 1                    1              1.5             11.4            1.5 
#> # ... with 4 more variables: Element_sd <dbl>, Correct_answer_sd <dbl>,
#> #   Response_time_sd <dbl>, Chart_type_sd <dbl>

Created on 2019-06-11 by the reprex package (v0.2.1)

cbo
  • 1,664
  • 1
  • 12
  • 27