3

I'm using ggstatsplot() to create some graphs. However, I'm running into a problem with one variable: I keep getting the 'discrete value supplied to a continuous scale' error and I can't figure out why.

I've tried as.numeric and creating new variables, converting from factor back to numeric and double, but nothing works.

I don't get the same error when I use regular ggplot2().

I wonder if anyone can help me figure out what the problem is?

Here's a complete reproducible example with the exact data I'm using.

#here's the exact data I am working with.
df <- structure(list(condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("0", 
"1", "2", "3"), class = "factor"), size = c(3, 1, 1, 2, 5, 4, 
5, 3, 1, 1, 4, 3, 5, 4, 4, 4, 2, 2, 4, 5, 3, 3, 3, 3, 5, 1, 5, 
5, 5, 1, 3, 4, 2, 1, 2, 1, 1, 3, 3, 1, 2, 3, 1, 4, 5, 5, 1, 5, 
4, 5, 5, 1, 1, 4, 1, 2, 5, 1, 2, 2, 5, 3, 3, 4, 5, 3, 3, 3, 2, 
1, 2, 4, 1, 1, 4, 4, 1, 2, NA, 3, 1, 4, 4, 2, 3, 4, 4, 4, 3, 
5, 4, 2, 2, 5, 5, 5, 4, 1, 2, 5, 5), predict = c(4, 4, 1, 1, 
1, 4, 2, 4, 3, 2, 2, 3, 1, 1, 4, 3, 5, 2, 4, 2, 1, 5, 3, 3, 3, 
3, 4, 2, 1, 1, 5, 2, 5, 3, 3, 3, 1, 5, 2, 3, 5, 2, 2, 5, 3, 2, 
1, 4, 2, 2, 4, 4, 1, 4, 3, 3, 1, 1, 2, 3, 4, 4, 2, 5, 4, 3, 2, 
3, 4, 4, 5, 2, 2, 4, 2, 2, 5, 4, NA, 1, 2, 3, 3, 5, 5, 5, 5, 
1, 1, 1, 2, 1, 4, 2, 1, 2, 5, 3, 1, 4, 5), meaningful = c(6, 
5, 3, 3, 5, 4, 3, 2, 4, 6, 6, 4, 2, 2, 4, 5, 2, 5, 2, 4, 5, 1, 
2, 7, 5, 7, 6, 3, 4, 4, 3, 7, 2, 2, 2, 4, 3, 3, 1, 6, 7, 1, 5, 
1, 7, 4, 1, 2, 3, 4, 1, 1, 4, 1, 7, 3, 4, 7, 6, 6, 2, 5, 5, 6, 
4, 3, 5, 6, 4, 1, 1, 2, 1, 4, 7, 5, 4, 6, NA, 5, 5, 6, 7, 4, 
3, 7, 7, 5, 4, 3, 1, 5, 5, 1, 6, 1, 5, 2, 5, 2, 1)), row.names = c(NA, 
-101L), class = c("tbl_df", "tbl", "data.frame"))

#here's some code I'm using to generate plots.
#if you need to, 
#install.packages("ggstatsplot")

library(ggstatsplot)
ggbetweenstats(data = df,
  x = condition, y = size,
  point.jitter.height = .2,
  plot.type = "violin",
  type = "p",
  effsize.type = "partial_eta",
  xlab = "Effect size condition",
  ylab = "Perceived Size",
  bf.message = F
  ) 

ggbetweenstats(data = df,
  x = condition, y = predict,
  point.jitter.height = .2,
  plot.type = "violin",
  type = "p",
  effsize.type = "partial_eta",
  xlab = "Effect size condition",
  ylab = "Prediction",
  bf.message = F
  ) 

#here's where I get the error
ggbetweenstats(data = df,
  x = condition, y = meaningful,
  point.jitter.height = .2,
  plot.type = "violin",
  type = "p",
  effsize.type = "partial_eta",
  xlab = "Effect size condition",
  ylab = "Meaningfulness",
  bf.message = F
  ) 

#I don't get the error using regular old ggplot, though.
library(ggplot2)
ggplot(data = df, aes(x = condition, y = meaningful)) + 
  geom_violin(aes(fill = condition)) + 
  geom_boxplot(width = .1, coef = 0) + 
  geom_jitter(width = .1, alpha = .5) + 
  theme_classic()

1 Answers1

1

This was a bug in ggstatsplot. Any dataframes with variables names with pattern "mean" (like meaningful here) were affected by this issue.

It's fixed now.

# setup
set.seed(123)
library(ggstatsplot)

# data
df <- structure(list(condition = structure(c(
  1L, 1L, 1L, 1L, 1L, 1L,
  1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
  2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
  2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
  3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
  3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
  4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L
), .Label = c(
  "0",
  "1", "2", "3"
), class = "factor"), size = c(
  3, 1, 1, 2, 5, 4,
  5, 3, 1, 1, 4, 3, 5, 4, 4, 4, 2, 2, 4, 5, 3, 3, 3, 3, 5, 1, 5,
  5, 5, 1, 3, 4, 2, 1, 2, 1, 1, 3, 3, 1, 2, 3, 1, 4, 5, 5, 1, 5,
  4, 5, 5, 1, 1, 4, 1, 2, 5, 1, 2, 2, 5, 3, 3, 4, 5, 3, 3, 3, 2,
  1, 2, 4, 1, 1, 4, 4, 1, 2, NA, 3, 1, 4, 4, 2, 3, 4, 4, 4, 3,
  5, 4, 2, 2, 5, 5, 5, 4, 1, 2, 5, 5
), predict = c(
  4, 4, 1, 1,
  1, 4, 2, 4, 3, 2, 2, 3, 1, 1, 4, 3, 5, 2, 4, 2, 1, 5, 3, 3, 3,
  3, 4, 2, 1, 1, 5, 2, 5, 3, 3, 3, 1, 5, 2, 3, 5, 2, 2, 5, 3, 2,
  1, 4, 2, 2, 4, 4, 1, 4, 3, 3, 1, 1, 2, 3, 4, 4, 2, 5, 4, 3, 2,
  3, 4, 4, 5, 2, 2, 4, 2, 2, 5, 4, NA, 1, 2, 3, 3, 5, 5, 5, 5,
  1, 1, 1, 2, 1, 4, 2, 1, 2, 5, 3, 1, 4, 5
), meaningful = c(
  6,
  5, 3, 3, 5, 4, 3, 2, 4, 6, 6, 4, 2, 2, 4, 5, 2, 5, 2, 4, 5, 1,
  2, 7, 5, 7, 6, 3, 4, 4, 3, 7, 2, 2, 2, 4, 3, 3, 1, 6, 7, 1, 5,
  1, 7, 4, 1, 2, 3, 4, 1, 1, 4, 1, 7, 3, 4, 7, 6, 6, 2, 5, 5, 6,
  4, 3, 5, 6, 4, 1, 1, 2, 1, 4, 7, 5, 4, 6, NA, 5, 5, 6, 7, 4,
  3, 7, 7, 5, 4, 3, 1, 5, 5, 1, 6, 1, 5, 2, 5, 2, 1
)), row.names = c(
  NA,
  -101L
), class = c("tbl_df", "tbl", "data.frame"))

# plot
ggbetweenstats(
  data = df,
  x = condition, 
  y = meaningful
)
#> Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
#> 
#> Note: Shapiro-Wilk Normality Test for meaningful: p-value = < 0.001
#> 
#> Note: Bartlett's test for homogeneity of variances for factor condition: p-value = 0.284
#> 

Created on 2019-11-22 by the reprex package (v0.3.0)

Indrajeet Patil
  • 4,673
  • 2
  • 20
  • 51