I´m trying to code a function in R (stats programming language) that would allow me to automate the calculation of a linear regression (lm)
The problem: The regression is calculated through the "step" function, so the coefficients selected cannot be known in advance. Problem
Automate identifying the coefficients selected by the step function.
Vlookup and cross multiply the second column of the results Ex."View(OpenCoefs)" (estimates) with the last row(last day) of respective columns of the original data frame "sp"
The desirable solution would be a function that i would just type "run()" that would return the "y"s for each regression, namely, the forecast of the S&P500 index for the following day(Open, Low, High,Close).
The code retrieves data from the yahoo finance website, so it´s operational if you run it.
Here´s the code.
sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))
sp$Adj.Close<-NULL
sp<-sp[nrow(sp):1,]
sp<-as.data.frame(sp)
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Open" ] <-
( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_High" ] <-
( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Low" ] <-
( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Close" ] <-
( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1
}
for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Volume" ] <-
( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1
}
nRows_in_sp<-1:nrow(sp)
sp<-cbind(sp,nRows_in_sp)
Open_Rollin<-NA
sp<-cbind(sp,Open_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_Rollin" ]<-0
} else {
sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
}
}
Close_Rollin<-NA
nRows_in_sp<-1:nrow(sp)
sp<-cbind(sp,Close_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , " Close_Rollin" ]<-0
} else {
sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
}
}
Low_Rollin<-NA
sp<-cbind(sp,Low_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_Rollin" ]<-0
} else {
sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
}
}
High_Rollin<-NA
sp<-cbind(sp,High_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_Rollin" ]<-0
} else {
sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
}
}
Open_GR_Rollin<-NA
sp<-cbind(sp,Open_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_GR_Rollin" ]<-0
} else {
sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
}
}
Close_GR_Rollin<-NA
sp<-cbind(sp, Close_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Close_GR_Rollin" ]<-0
} else {
sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
}
}
Low_GR_Rollin<-NA
sp<-cbind(sp, Low_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_GR_Rollin" ]<-0
} else {
sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
}
}
High_GR_Rollin<-NA
sp<-cbind(sp, High_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_GR_Rollin" ]<-0
} else {
sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
}
}
Open_SD_Rollin<-NA
sp<-cbind(sp,Open_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
}
}
Close_SD_Rollin<-NA
sp<-cbind(sp, Close_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
}
}
Low_SD_Rollin<-NA
sp<-cbind(sp, Low_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
}
}
High_SD_Rollin<-NA
sp<-cbind(sp, High_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
}
}
N <- length(sp[,"Open"])
Openlag <- c(NA, sp[,"Open"][1:(N-1)])
sp<-cbind(sp,Openlag)
Highlag <- c(NA, sp[,"High"][1:(N-1)])
sp<-cbind(sp,Highlag)
Lowlag <- c(NA, sp[,"Low"][1:(N-1)])
sp<-cbind(sp,Lowlag)
Closelag <- c(NA, sp[,"Close"][1:(N-1)])
sp<-cbind(sp,Closelag)
Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])
sp<-cbind(sp,Gr_Openlag)
Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])
sp<-cbind(sp,Gr_Highlag)
Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])
sp<-cbind(sp,Gr_Lowlag)
Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])
sp<-cbind(sp,Gr_Closelag)
Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])
sp<-cbind(sp,Gr_Volumelag)
Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Open_GR_Rollinlag)
Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Low_GR_Rollinlag)
High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, High_GR_Rollinlag)
Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, Close_GR_Rollinlag)
Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Open_SD_Rollinlag)
Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Low_SD_Rollinlag)
High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, High_SD_Rollinlag)
Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])
sp<-cbind(sp, Close_SD_Rollinlag)
OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))
View(OpenCoefs)
View(LowCoefs)
View(HighCoefs)
View(CloseCoefs)
View(sp)