Here is my homework:
In a first step, describe the dataset as well as the variables contained within it. Use descriptive measures and figures to do this. Then, compare Y T =1 to Y T =0 in a second step. Use an appropriate figure to visualize the relationship between these variables. Finally, use the data to argue whether or not it is likely that treatment is randomly assigned in a third step.
Here is my code:
library(tidyverse)
library(readr)
X17856731 <- read_csv("17856731.csv")
View(X17856731)
df_X21751433<-read_csv("17856731.csv")
mean(X17856731$X)
mean(X17856731$Y)
Mode = function(x, na.rm = FALSE) {
if(na.rm) {
x = na.omit(x)
}
ux = unique(x)
return(ux[which.max(tabulate(match(x)))])
}
Mode(X17856731$X, na.rm = TRUE)
Mode(X17856731$Y, na.rm = TRUE)
median(X17856731$X, na.rm = TRUE)
median(X17856731$Y, na.rm = TRUE)
var(X17856731$X, na.rm = TRUE)
var(X17856731$Y, na.rm = TRUE)
sd(X17856731$X, na.rm = TRUE)
sd(X17856731$Y, na.rm = TRUE)
install.packages("moments")
library(moments)
skewness(X17856731$X, na.rm = TRUE)
skewness(X17856731$Y, na.rm = TRUE)
IQR(X17856731$X, na.rm = TRUE)
IQR(X17856731$Y, na.rm = TRUE)
df_nottreated_y = filter(X17856731, T == 0)
df_treated_y = filter(X17856731, T == 1)
mean(df_treated_y$Y, na.rm = TRUE) - mean(df_nottreated_y$Y, na.rm = TRUE)
X17856731%>%
group_by(T) %>%
summarize(df_nottreated_y = mean(Y),
df_treated_y = mean(Y))
select(X17856731, Y)
head(
select(X17856731, Y, T = 0)
)
select(X17856731, Y)
head(
select(X17856731, Y , T = 1)
)
df_selects = X17856731 :: selects %>%
select(Y) %>%
mutate(
Y= recode(treatment,
"treated" = 1,
not_treated = 0)
) %>% na.omit
df_selects
data(treated_y, package = 'dplyr')
treated_y
data(nottreated_y, package = 'dplyr')
nottreated_y