Error plotting lines on a graph made in R

Question

Could you help me solve following issue:

I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).

Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.

Could the problem be with the ylim?

I hope someone helps me with this! =)

Thank you so much!

First code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))

graph <- function(dt, dta = data) {                                        
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                    date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

DS=c("Thursday","Friday","Saturday") 

i<-2

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week==DS[i])

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
    

  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

Second code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                        date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

# get the week day
  
my_day <- weekdays(as.Date(dt))

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week == my_day)

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))


dta %>%
  filter(date2 == ymd(dt)) %>%
  summarize(across(starts_with("DR"), sum)) %>%
  pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
  mutate(name = as.numeric(name)) %>%
  plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
       cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
  abline(h=(mean + sd), col='green',lty=2) 
abline(h=(mean - sd), col='orange',lty=2)

}  
graph("2021-04-09",data)

Marek Fiołka · Accepted Answer · 2021-08-28T18:39:30.100

You've messed up a lot with these data transformations. Below, however, I present my code that works according to your expectations.

The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
  dim_data<-dim(data)
  
  day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                  length = dim_data[1]
  )) 
  
  data_grouped <- data %>%
    mutate(across(starts_with("date"), as.Date)) %>%
    group_by(date2) %>%
    summarise(Id = first(Id),
              date1 = first(date1),
              Week = first(Week),
              DTPE = first(DTPE),
              D1 = sum(D1)) %>%
    select(Id,date1,date2,Week,DTPE,D1)
  
  #data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
  #                                        date2=format(date2,"%d/%m/%Y"))
  #data_grouped<-data.frame(data_grouped)
  data_grouped %>% 
    mutate(DTPE = na_if(DTPE, ""))
  
  # get the week day
  
  #my_day <- weekdays(as.Date(dt))
  
  df_OC<-subset(data_grouped, DTPE == "")
  ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
  #ds_CO<-subset(df_OC,df_OC$Week == my_day)
  
  mean<-mean(ds_CO$D1)
  sd<-sd(ds_CO$D1)
  
  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

Finally, I recommend:

Keep your data in tibble,
do not unnecessarily transform the date into a string several times and vice versa,
use ggplo2. The charts will be much nicer.

Marek thank you so much for replying. Just two things: in the mean and standard deviation you put ds_CO, but you excluded this variable. I adjusted this, however it gave an error in the graph generation. Please, could you test again your code? — Antonio, Aug 28 '21 at 17:59

Error plotting lines on a graph made in R

1 Answers1