0

The question is given like this:

Read the file diabetes.csv. There are two variables called BMI and Outcome. The variable Outcome takes on only two values: 0 and 1. Conduct a non-parametric two sample test for the hypothesis that the standard deviation of BMI is the same for both Outcome values

bmi <- diabetes$BMI
bmi
outcome <- diabetes$Outcome
outcome

n <- length(bmi)

# tstat
tstat <- ???

# Describe the population and draw synthetic samples
f1 <- function()
{
    x <- c(bmi, outcome) 
    x <- sample(x)
    m1 <- sd(x[1:n])
    m2 <- sd(x[(n+1):length(x)])
    return(m1 - m2)
}

# Create sampling distribution
sdist <- replicate(10000, f1())
plot(density(sdist))

# Gap
gap <- abs(mean(sdist) - tstat)
abline(v = mean(sdist) + c(-1,1) * gap, col = "dark orange")
s1 <- sdist[sdist <(mean(sdist - gap)) | sdist >(mean(sdist + gap))]
pvalue <- length(s1) / length(sdist)
pvalue

The data is in some dataset called "diabetes". My question is how to represent the "t-statistic" since the outcome is binary?

Imran Ali
  • 2,223
  • 2
  • 28
  • 41
SeanZ
  • 17
  • 5

1 Answers1

0

Use this code:

# Sort the table diabetes on accending order of Outcome to separate the BMI 
# values with outcome = 0 and BMI values with outcome = 1

diabetes = diabetes[order(diabetes$Outcome),]   
View(diabetes)

# Find the number of values with outcome = 0

n = length(which(diabetes$Outcome == 0)) 

# Find total number of rows 

l = length(diabetes$BMI)               

# Find BMI values to create the sample later on

g = diabetes$BMI                           

# Create function to take the values of BMI and shuffle it every time and
# to find the difference between the standard deviations

f1 = function()
{
  x = sample(g)             
  z = abs(sd(x[1:n]) - sd(x[(n+1):l]))
  return(z)
}

# Replicate the function several times

dist = replicate(100000,f1())          

# Plot density of distribution

plot(density(dist))                    

polygon(density(dist),col="green")


diabetes0 = diabetes[diabetes$Outcome == 0,]
diabetes1 = diabetes[diabetes$Outcome == 1,]

View(diabetes0)
View(diabetes1)

# Find the difference between standard deviation of BMI when outcome = 0 and 
# when outcome = 1

tstat = abs(sd(diabetes0$BMI) - sd(diabetes1$BMI))       

tstat

abline(v=tstat)                                           
rside = dist[dist>tstat]    


pvalue = length(rside)/length(dist)
pvalue 
ekhumoro
  • 115,249
  • 20
  • 229
  • 336
Prakriti
  • 26
  • 3