sleep.study <- tibble(
"hrs_no_sleep" = c(8, 8, 12, 12, 16, 16, 20, 20, 24, 24),
"num_errors" = c(8, 6, 6, 10, 14, 8, 14, 12, 12, 16)
)
ggplot(data=sleep.study, aes(x=hrs_no_sleep, y=num_errors)) +
geom_point(color='#2980B9', size = 3)
mod <- lm(num_errors ~ hrs_no_sleep, data = sleep.study)
summary(mod)
##
## Call:
## lm(formula = num_errors ~ hrs_no_sleep, data = sleep.study)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70 -2.00 0.35 1.45 3.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0000 2.1266 1.411 0.19602
## hrs_no_sleep 0.4750 0.1253 3.791 0.00531 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.242 on 8 degrees of freedom
## Multiple R-squared: 0.6423, Adjusted R-squared: 0.5976
## F-statistic: 14.37 on 1 and 8 DF, p-value: 0.005308
# Get parameters from the model
n <- 10
x <- sleep.study$hrs_no_sleep
y <- sleep.study$num_errors
b1.hat <- (n*sum(x*y) - sum(x)*sum(y))/(n*sum(x^2) - (sum(x)^2))
b0.hat <- mean(y)-b1.hat*mean(x)
res <- y - b0.hat - b1.hat*x
s <- sqrt(sum(res^2)/(n-2))
#ci b0
print(c(
b0.hat - qt(.975,df=n-2)*s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2))),
b0.hat + qt(.975,df=n-2)*s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2)))
))
## [1] -1.903988 7.903988
#ci b1
print(c(
b1.hat - qt(.975,df=n-2)*s/sqrt(sum((x-mean(x))^2)),
b1.hat + qt(.975,df=n-2)*s/sqrt(sum((x-mean(x))^2))
))
## [1] 0.1860298 0.7639702
#ci sig
print(c(
(n-2)*s^2/qchisq(.975,n-2),
(n-2)*s^2/qchisq(.025,n-2)
))
## [1] 2.292617 18.442645
Test statistics and p-values for the hypothesis \[ H_0: \beta_i = 0, \quad H_a: \beta_i \neq0\] Compare these values to the model output summary to verify. For \(\sigma^2\), we are testing \[ H_0: \sigma^2 = 2^2, \quad H_a: \sigma^2 > 4\]
#ht b0
ts <- b0.hat/(s*sqrt(sum(x^2))/(sqrt(n)*sqrt(sum((x-mean(x))^2))))
2*pt(ts,lower.tail=F,df=n-2)
## [1] 0.1960158
#ht b0
ts <- b1.hat/(s/sqrt(sum((x-mean(x))^2)))
2*pt(abs(ts),lower.tail=F,df=n-2)
## [1] 0.005307774
#ht sig>2
ts <- (n-2)*s^2/4
pchisq(ts,lower.tail=F,df=n-2)
## [1] 0.2615341
Compare this to the values given by t.test
.
A 95% CI for the average number of errors for a subject deprived of sleep for 18 hours.
res <- y - b0.hat - b1.hat*x
s <- sqrt(sum(res^2)/(n-2))
yhat <- b0.hat + b1.hat*18
lower.ci <- yhat - qt(.975,df=n-2) * s * sqrt(1/n + (18-mean(x))^2/sum((x-mean(x))^2))
upper.ci <- yhat + qt(.975,df=n-2) * s * sqrt(1/n + (18-mean(x))^2/sum((x-mean(x))^2))
print(c(lower.ci, upper.ci))
## [1] 9.816179 13.283821
R will compute and plot confidence intervals for the average. Compare the upper and lower points of the band to the CI you computed above.
ggplot(data=sleep.study, aes(x=hrs_no_sleep, y=num_errors)) +
geom_point(color='#2980B9', size = 4) +
geom_smooth(method=lm, color='#2C3E50')
## `geom_smooth()` using formula = 'y ~ x'