To read the stock prices (adjusted prices) data, we use the R package “quantmod”.
if (!require("quantmod")) {
install.packages("quantmod")
require("quantmod")
}
#If you do not specify the "to" option, then the end date of the data will be today.
getSymbols('CM',from="2000-01-01",to="2015-10-05")
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## Warning in download.file(paste(yahoo.URL, "s=", Symbols.name, "&a=",
## from.m, : downloaded length 251563 != reported length 200
## [1] "CM"
Z <- CM
str(Z)
## An 'xts' object on 2000-01-03/2015-10-05 containing:
## Data: num [1:3964, 1:6] 23.6 23 22.8 23.1 23.1 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:6] "CM.Open" "CM.High" "CM.Low" "CM.Close" ...
## Indexed by objects of class: [Date] TZ: UTC
## xts Attributes:
## List of 2
## $ src : chr "yahoo"
## $ updated: POSIXct[1:1], format: "2015-10-06 21:21:41"
dim(Z) #the length of the data
## [1] 3964 6
head(Z) # show the first 6 rows of data
## CM.Open CM.High CM.Low CM.Close CM.Volume CM.Adjusted
## 2000-01-03 23.625 23.6250 23.000 23.0000 21900 10.99155
## 2000-01-04 23.000 23.0625 22.625 23.0000 15000 10.99155
## 2000-01-05 22.750 23.4375 22.750 23.3750 11000 11.17076
## 2000-01-06 23.125 23.3125 23.000 23.1250 6200 11.05129
## 2000-01-07 23.125 23.1250 22.500 23.0000 13000 10.99155
## 2000-01-10 22.875 22.9375 22.625 22.6875 6700 10.84221
tail(Z) # show the last 6 rows of data
## CM.Open CM.High CM.Low CM.Close CM.Volume CM.Adjusted
## 2015-09-28 69.42 69.91 69.03 69.03 313800 69.03
## 2015-09-29 69.23 69.96 68.87 69.86 226300 69.86
## 2015-09-30 70.49 72.07 70.47 71.96 300700 71.96
## 2015-10-01 72.66 72.86 72.16 72.60 334400 72.60
## 2015-10-02 71.99 73.02 71.33 73.01 458300 73.01
## 2015-10-05 73.71 74.99 73.52 74.88 340600 74.88
Separate the
train <- Z[1:2000]
test <- Z[2001:3956]
You have several ways to visualize the data.
#From the xts package
plot(train[,6], main = "Adjusted price, CIBC stock")

#From the quantmod package
chartSeries(train[,6], theme = "white", name = "Adjusted price, CIBC stock")

#From the lattice package
library(lattice)
xyplot(train[,6], main = "Adjusted price, CIBC stock")

Next, we compute the log return for the stock price.
R <- diff( log(train[,6]) )
head(R)
## CM.Adjusted
## 2000-01-03 NA
## 2000-01-04 0.000000000
## 2000-01-05 0.016172850
## 2000-01-06 -0.010752755
## 2000-01-07 -0.005420095
## 2000-01-10 -0.013680034
R <- R[-1,]
plot(R, main = "Log returns, CIBC stock")

Let Y_t=|R_t| and consider the scatterplot of Y_t vsY_t-1
Ra <- abs(R)
Ra <- as.numeric(Ra)
plot( Ra[-length(Ra)], Ra[-1],
xlab = expression(Y[t-1]), ylab = expression(Y[t]),
main = paste(expression(Y[t]),"vs",expression(Y[t-1])),
pch=".", cex=4 )

If we change the figure scale to be log-log
summary(Ra)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.003729 0.007988 0.010840 0.014370 0.083300
#Two outliers
plot( Ra[-length(Ra)], Ra[-1], log = "xy",
xlab = expression(Y[t-1]), ylab = expression(Y[t]),
main = paste(expression(Y[t]),"vs",expression(Y[t-1]),"loglog"),
pch=".", cex=4 )
## Warning in xy.coords(x, y, xlabel, ylabel, log): 19 x values <= 0 omitted
## from logarithmic plot
## Warning in xy.coords(x, y, xlabel, ylabel, log): 18 y values <= 0 omitted
## from logarithmic plot

#So we perform a shifted log transformation
Raslog <- log(Ra+0.001)
xslog <- Raslog[-length(Raslog)]
yslog <- Raslog[-1]
plot( xslog, yslog,
xlab = expression(Y[t-1]), ylab = expression(Y[t]),
main = paste("shifted log",expression(Y[t]),"vs","shifted log",expression(Y[t-1])),
pch=".", cex=4 )
points(xslog[xslog==log(0.001)],yslog[xslog==log(0.001)],col="red")
points(xslog[yslog==log(0.001)],yslog[yslog==log(0.001)],col="red")

Fit a linear model
#Let's fit a least sum of residual squares regression
m1 <- lm(yslog~xslog)
summary(m1)
##
## Call:
## lm(formula = yslog ~ xslog)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.39111 -0.57949 0.06551 0.60846 2.40513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.10380 0.10765 -38.120 < 2e-16 ***
## xslog 0.14188 0.02214 6.408 1.83e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8574 on 1996 degrees of freedom
## Multiple R-squared: 0.02016, Adjusted R-squared: 0.01967
## F-statistic: 41.06 on 1 and 1996 DF, p-value: 1.832e-10
#Thought the slope term is significant, its value is quite low which means the linear correlation between consecutive returns is small.
#We can also check the assumption of normal residuals
qqnorm(resid(m1))
qqline(resid(m1))

#Let's try a robust regression, the least median of squares regression
library(MASS)
m2 <- lmsreg(yslog~xslog)
#Thought it is robust method, the LME may give different estimates as its solution is not unique.
#Visualize the fitted model
plot( xslog, yslog,
xlab = expression(Y[t-1]), ylab = expression(Y[t]),
main = paste("shifted log",expression(Y[t]),"vs","shifted log",expression(Y[t-1])),
pch=".", cex=4 )
abline(m1) #Black line: LSE Line
abline(m2,col="red") #Red line: least median of squares regression
legend("bottomright",legend = c("LSE","LME"),col=c("black","red"),lty=c(1,1))

For the test subset
Rtest <- diff( log(test[,6]) )
Rtest <- Rtest[-1,]
Ratest <- abs(Rtest)
Ratest <- as.numeric(Ratest)
xtest <- Ratest[-length(Ratest)]
ytest <- Ratest[-1]
#Three criteria
# predict(m1) will give the fitted values for the training set
#For the test data set
#Mean squared prediction error (MSPE)
sqrt(mean((exp( predict( m1,newdata = data.frame(xslog=log(xtest+0.001)) ) )-0.001-ytest)^2))
## [1] 0.01594851
#Mean absolute errors (MAE)
mean(abs(exp( predict( m1,newdata = data.frame(xslog=log(xtest+0.001)) ) )-0.001-ytest))
## [1] 0.008827386
#Mean absolute percentage error (MAPE)
mean( abs( (exp( predict( m1,newdata = data.frame(xslog=log(xtest+0.001)) ) )-0.001-ytest)/ytest) )
## [1] Inf
#there are 0 values in ytest, so instead of looking at mean, we look at median
summary(ytest)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.003544 0.007987 0.012770 0.015240 0.156100
median( abs( (exp( predict( m1,newdata = data.frame(xslog=log(xtest+0.001)) ) )-0.001-ytest)/ytest) )
## [1] 0.598594
This is just a sample code for Assignment 1. Feel free to modify the code and make appropriate comments by yourself.