Sleep Study Data

sleep.study <- tibble(
  "hrs_no_sleep" =  c(8, 8, 12, 12, 16, 16, 20, 20, 24, 24),
  "num_errors" = c(8, 6,  6, 10, 14, 8, 14, 12, 12, 16)
)
ggplot(data=sleep.study, aes(x=hrs_no_sleep, y=num_errors)) + 
  geom_point(color='#2980B9', size = 3)

mod <- lm(num_errors ~ hrs_no_sleep, data = sleep.study)
summary(mod)
## 
## Call:
## lm(formula = num_errors ~ hrs_no_sleep, data = sleep.study)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -2.70  -2.00   0.35   1.45   3.40 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    3.0000     2.1266   1.411  0.19602   
## hrs_no_sleep   0.4750     0.1253   3.791  0.00531 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.242 on 8 degrees of freedom
## Multiple R-squared:  0.6423, Adjusted R-squared:  0.5976 
## F-statistic: 14.37 on 1 and 8 DF,  p-value: 0.005308

Confidence Intervals for \(\beta_0, \beta_1, \sigma^2\)

# Get parameters from the model
n <- 10
x <- sleep.study$hrs_no_sleep
y <- sleep.study$num_errors

b1.hat <- (n*sum(x*y) - sum(x)*sum(y))/(n*sum(x^2) - (sum(x)^2)) 
b0.hat <- mean(y)-b1.hat*mean(x)

res <- y - b0.hat - b1.hat*x
s <- sqrt(sum(res^2)/(n-2))

#ci b0 
print(c(
  b0.hat - qt(.975,df=n-2)*s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2))),
  b0.hat + qt(.975,df=n-2)*s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2)))
))
## [1] -1.903988  7.903988
#ci b1 
print(c(
  b1.hat - qt(.975,df=n-2)*s/sqrt(sum((x-mean(x))^2)),
  b1.hat + qt(.975,df=n-2)*s/sqrt(sum((x-mean(x))^2))
  ))
## [1] 0.1860298 0.7639702
#ci sig
print(c(
  (n-2)*s^2/qchisq(.975,n-2),
  (n-2)*s^2/qchisq(.025,n-2)
))
## [1]  2.292617 18.442645

Hypothesis Tests for \(\beta_0, \beta_1, \sigma^2\)

Test statistics and p-values for the hypothesis \[ H_0: \beta_i = 0, \quad H_a: \beta_i \neq0\] Compare these values to the model output summary to verify. For \(\sigma^2\), we are testing \[ H_0: \sigma^2 = 2^2, \quad H_a: \sigma^2 > 4\]

#ht b0
ts <- b0.hat/(s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2))))
2*pt(ts,lower.tail=F,df=n-2)
## [1] 0.1960158
#ht b0
ts <- b1.hat/(s/sqrt(sum((x-mean(x))^2)))
2*pt(abs(ts),lower.tail=F,df=n-2)
## [1] 0.005307774
#ht sig>2
ts <- (n-2)*s^2/4
pchisq(ts,lower.tail=F,df=n-2)
## [1] 0.2615341

Compare this to the values given by t.test.

Confidence Interval for \(E(Y)\)

A 95% CI for the average number of errors for a subject deprived of sleep for 18 hours.

res <- y - b0.hat - b1.hat*x
s <- sqrt(sum(res^2)/(n-2))

yhat <- b0.hat + b1.hat*18
lower.ci <- yhat - qt(.975,df=n-2) * s * sqrt(1/n + (18-mean(x))^2/sum((x-mean(x))^2))
upper.ci <- yhat + qt(.975,df=n-2) * s * sqrt(1/n + (18-mean(x))^2/sum((x-mean(x))^2))

print(c(lower.ci, upper.ci))
## [1]  9.816179 13.283821

R will compute and plot confidence intervals for the average. Compare the upper and lower points of the band to the CI you computed above.

ggplot(data=sleep.study, aes(x=hrs_no_sleep, y=num_errors)) + 
  geom_point(color='#2980B9', size = 4) +
  geom_smooth(method=lm, color='#2C3E50')
## `geom_smooth()` using formula = 'y ~ x'