0

Could you help me solve following issue:

I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).

Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.

Could the problem be with the ylim?

I hope someone helps me with this! =)

Thank you so much!

First code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))

graph <- function(dt, dta = data) {                                        
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                    date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

DS=c("Thursday","Friday","Saturday") 

i<-2

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week==DS[i])

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
    

  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

enter image description here

Second code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                        date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

# get the week day
  
my_day <- weekdays(as.Date(dt))

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week == my_day)

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))


dta %>%
  filter(date2 == ymd(dt)) %>%
  summarize(across(starts_with("DR"), sum)) %>%
  pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
  mutate(name = as.numeric(name)) %>%
  plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
       cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
  abline(h=(mean + sd), col='green',lty=2) 
abline(h=(mean - sd), col='orange',lty=2)

}  
graph("2021-04-09",data)

enter image description here

Antonio
  • 1,091
  • 7
  • 24

1 Answers1

1

You've messed up a lot with these data transformations. Below, however, I present my code that works according to your expectations.

The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
  dim_data<-dim(data)
  
  day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                  length = dim_data[1]
  )) 
  
  data_grouped <- data %>%
    mutate(across(starts_with("date"), as.Date)) %>%
    group_by(date2) %>%
    summarise(Id = first(Id),
              date1 = first(date1),
              Week = first(Week),
              DTPE = first(DTPE),
              D1 = sum(D1)) %>%
    select(Id,date1,date2,Week,DTPE,D1)
  
  #data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
  #                                        date2=format(date2,"%d/%m/%Y"))
  #data_grouped<-data.frame(data_grouped)
  data_grouped %>% 
    mutate(DTPE = na_if(DTPE, ""))
  
  # get the week day
  
  #my_day <- weekdays(as.Date(dt))
  
  df_OC<-subset(data_grouped, DTPE == "")
  ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
  #ds_CO<-subset(df_OC,df_OC$Week == my_day)
  
  mean<-mean(ds_CO$D1)
  sd<-sd(ds_CO$D1)
  
  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

Finally, I recommend:

  1. Keep your data in tibble,
  2. do not unnecessarily transform the date into a string several times and vice versa,
  3. use ggplo2. The charts will be much nicer.
Marek Fiołka
  • 4,825
  • 1
  • 5
  • 20
  • Marek thank you so much for replying. Just two things: in the mean and standard deviation you put ds_CO, but you excluded this variable. I adjusted this, however it gave an error in the graph generation. Please, could you test again your code? – Antonio Aug 28 '21 at 17:59
  • Thanks Marek! Helped me a lot! – Antonio Aug 28 '21 at 19:07