Tutorial1

LM

#import data
leadpol <- read.table("http://www.stats.uwo.ca/faculty/aim/2017/3859/data/leadpol.txt", 
                      skip=3, header=TRUE)
leadpol

##    lead traffic
## 1   227     8.3
## 2   312     8.3
## 3   362    12.1
## 4   521    12.1
## 5   640    17.0
## 6   539    17.0
## 7   728    17.0
## 8   945    24.3
## 9   738    24.3
## 10  759    24.3
## 11 1263    33.6

#regression
ans <- lm(lead ~ traffic, data=leadpol)
#scatter plot with an abline
plot(leadpol$traffic,leadpol$lead,xlab = "traffic",ylab = "lead")
abline(reg=ans, col="blue")

#obtain residuals
e <- resid(ans)
e

##          1          2          3          4          5          6 
##  -60.48439   24.51561  -62.98301   96.01699   37.71613  -63.28387 
##          7          8          9         10         11 
##  125.71613   78.57404 -128.42596 -107.42596   60.06426

#histogram
hist(e)

#histogram with a density curve
set.seed(0912)
rnorm10000 <- rnorm(10000)
hist(rnorm10000,freq = FALSE)
lines(density(rnorm10000),col="blue")

#simulate normal residuals and residuals that follow t distribution with 5 degree of freedom
e_norm <- rnorm(length(leadpol$lead),0,1)
e_t <- rt(length(leadpol$lead),5)
#boxplot
boxplot(e_norm,e_t)

#qqnorm with a qqline
qqnorm(e_norm,main = "Normal Q-Q Plot")
qqline(e_norm)

#simulate a simple linear regression with normal errors and with t-distributed errors with t on 5 df. Compare fits with lm.
nlead <- ans$fitted.values-e+e_norm
plot(leadpol$lead,nlead,xlab = "original lead",ylab = "lead with normal errors")
abline(0,1,col="red")

tlead <- ans$fitted.values-e+e_t
plot(leadpol$lead,tlead,xlab = "original lead",ylab = "lead with normal errors")
abline(0,1,col="blue")

#lm plots
plot(ans)

Tutorial1

Yiran Wang ywan2325@uwo.ca

2017-9-12

Rmarkdown html

LM