File -> New File -> R Markdown -> HTML-> OK
download R Markdown package: Package -> install -> “rmarkdown”
#import data
leadpol <- read.table("http://www.stats.uwo.ca/faculty/aim/2017/3859/data/leadpol.txt",
skip=3, header=TRUE)
leadpol
## lead traffic
## 1 227 8.3
## 2 312 8.3
## 3 362 12.1
## 4 521 12.1
## 5 640 17.0
## 6 539 17.0
## 7 728 17.0
## 8 945 24.3
## 9 738 24.3
## 10 759 24.3
## 11 1263 33.6
#regression
ans <- lm(lead ~ traffic, data=leadpol)
#scatter plot with an abline
plot(leadpol$traffic,leadpol$lead,xlab = "traffic",ylab = "lead")
abline(reg=ans, col="blue")
#obtain residuals
e <- resid(ans)
e
## 1 2 3 4 5 6
## -60.48439 24.51561 -62.98301 96.01699 37.71613 -63.28387
## 7 8 9 10 11
## 125.71613 78.57404 -128.42596 -107.42596 60.06426
#histogram
hist(e)
#histogram with a density curve
set.seed(0912)
rnorm10000 <- rnorm(10000)
hist(rnorm10000,freq = FALSE)
lines(density(rnorm10000),col="blue")
#simulate normal residuals and residuals that follow t distribution with 5 degree of freedom
e_norm <- rnorm(length(leadpol$lead),0,1)
e_t <- rt(length(leadpol$lead),5)
#boxplot
boxplot(e_norm,e_t)
#qqnorm with a qqline
qqnorm(e_norm,main = "Normal Q-Q Plot")
qqline(e_norm)
#simulate a simple linear regression with normal errors and with t-distributed errors with t on 5 df. Compare fits with lm.
nlead <- ans$fitted.values-e+e_norm
plot(leadpol$lead,nlead,xlab = "original lead",ylab = "lead with normal errors")
abline(0,1,col="red")
tlead <- ans$fitted.values-e+e_t
plot(leadpol$lead,tlead,xlab = "original lead",ylab = "lead with normal errors")
abline(0,1,col="blue")
#lm plots
plot(ans)