I wrote the following code for prediction of the stock market and it is working properly when I use the Adjusted Close Price as input and try to predict the next day Adjusted Close Price. The file "VALE3.SA.csv" is the historical data from Yahoo Finance from 01/01/2018 to 09/30/2022.
library("rnn")
library("dplyr")
data <- read.csv("VALE3.SA.csv")
plot(data$Adj.Close)
fechamento_adj <- data$Adj.Close
fechamento_adj_anterior <- lag(fechamento_adj,n=1L)
data_analise <- data.frame(fechamento_adj)
data_analise$fechamento_adj_anterior <- fechamento_adj_anterior
data_analise <- na.omit(data_analise)
x <- data_analise[,2]
y <- data_analise[,1]
X <- matrix(x, nrow = 168)
Y <- matrix(y, nrow = 168)
Yscaled <- (Y - min(Y)) / (max(Y) - min(Y))
Xscaled <- (X - min(X)) / (max(X) - min(X))
Y <- Yscaled
X <- Xscaled
train=1:5
test=6:7
model <- trainr(Y = Y[,train],
X = X[,train],
learningrate = 0.05,
hidden_dim = 15,
numepochs = 1000,
network_type = "rnn"
)
Ytrain <- t(matrix(predictr(model, X[,train]),nrow=1))
Yreal <- t(matrix(Y[,train],nrow=1))
rsq <- function(y_actual,y_predict){
cor(y_actual,y_predict)^2
}
rsq(Yreal,Ytrain)
Ytest=matrix(Y[,test], nrow = 1)
Ytest = t(Ytest)
Yp <- predictr(model, Y[,test])
Ypredicted=matrix(Yp, nrow = 1)
Ypredicted=t(Ypredicted)
result_data <- data.frame(Ytest)
result_data$Ypredicted <- Ypredicted
rsq(result_data$Ytest,result_data$Ypredicted)
mean(result_data$Ytest)
mean(result_data$Ypredicted)
The problem is when I tried to utilize two inputs (Adjusted Close price and volume). I tried the following code. The array X at the "trainr" function looks like wrong and also I am getting the error at "Yp <- predictr(model, Y[,test])" Error in x %*% model$time_synapse[[i]] : non-conformable arguments
library("rnn")
library("dplyr")
data <- read.csv("VALE3.SA.csv")
fechamento_adj <- data$Adj.Close
volume <- data$Volume
fechamento_adj_anterior <- lag(fechamento_adj,n=1L)
volume_anterior <- lag(volume,n=1L)
data_analise <- data.frame(fechamento_adj)
data_analise$fechamento_adj_anterior <- fechamento_adj_anterior
data_analise$volume_anterior <- volume_anterior
#exclui NA
data_analise <- na.omit(data_analise)
#PEGAR A SEGUNDA COLUNA COMO INPUT
x1 <- data_analise[,2]
x2 <- data_analise[,3]
#PEGAR A PRIMEIRA COLUNA COMO TARGET
y <- data_analise[,1]
X1 <- matrix(x1, nrow = 168)
X2 <- matrix(x2, nrow = 168)
Y <- matrix(y, nrow = 168)
Yscaled <- (Y - min(Y)) / (max(Y) - min(Y))
X1scaled <- (X1 - min(X1)) / (max(X1) - min(X1))
X2scaled <- (X2 - min(X2)) / (max(X2) - min(X2))
Y <- Yscaled
X1 <- X1scaled
X2 <- X2scaled
X <- array( c(X1,X2), dim=c(dim(X1),2) )
train=1:5
test=6:7
model <- trainr(Y = Y[,train],
X = X[,train,],
learningrate = 0.05,
hidden_dim = 15,
numepochs = 10,
network_type = "rnn"
)
#no conjunto de treinamento
Ytrain <- t(matrix(predictr(model, X[,train,]),nrow=1))
Yreal <- t(matrix(Y[,train],nrow=1))
#Percentual de variação em uma variável explicada por outra
rsq <- function(y_actual,y_predict){
cor(y_actual,y_predict)^2
}
rsq(Yreal,Ytrain)
#no conjunto de teste
Ytest=matrix(Y[,test], nrow = 1)
Ytest = t(Ytest)
Yp <- predictr(model, Y[,test])
Ypredicted=matrix(Yp, nrow = 1)
Ypredicted=t(Ypredicted)
result_data <- data.frame(Ytest)
result_data$Ypredicted <- Ypredicted
rsq(result_data$Ytest,result_data$Ypredicted)
mean(result_data$Ytest)
mean(result_data$Ypredicted)