I am trying to build an LSTM model for Multistep prediction. My data is a time series of parking occupancy rate sampled each five minutes (I have 25 weeks of samples). I started creating the code like below :
import numpy as np
training_data_len = int(np.ceil( len(data) * .90 ))
train_data = data.iloc[0:int(training_data_len), :]
print(len(train_data))
# Create the testing data set
test_data = data.iloc[training_data_len: , :] # - timestep
print(len(test_data))
data_train = np.array(train_data)
def split_sequence(sequence, n_steps_in, n_steps_out):
X, y = list(), list()
for i in range(len(sequence)):
# find the end of this pattern
end_ix = i + n_steps_in
out_end_ix = end_ix + n_steps_out
# check if we are beyond the sequence
if out_end_ix > len(sequence):
break
# gather input and output parts of the pattern
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:out_end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
X_train, y_train = [], []
X_train, y_train = split_sequence(data_train,6,6)
reg = Sequential()
reg.add(LSTM(units = 200,return_sequences=True, input_shape=(1,1)))#, return_sequences=True , activation = 'relu'
reg.add(Dropout(0.2))
reg.add(LSTM(units = 200,return_sequences=True)) #, activation = 'relu'
reg.add(Dropout(0.2))
reg.add(LSTM(units = 200,return_sequences=True)) #, activation = 'relu'
reg.add(Dropout(0.2))
reg.add(Dense(6,))
#here we have considered loss as mean square error and optimizer as adam
reg.compile(loss='mse', optimizer='adam')
#training the model
#,validation_split=0.1,
# shuffle=False
reg.fit(X_train, y_train, epochs = 10,verbose=1)
data_test = np.array(test_data)
#here we are splitting the data weekly wise(7days)
X_test, y_test = split_sequence(data_test,6,6)
y_pred = reg.predict(X_test)
My goal is to predict using 30 minutes in the past(6 samples =30 minutes) next 30 minutes(6 samples =30 minutes).
I'm new with these kind of models and I wanna know if i'm working good or there is something that i'm missing or some improves. Thank you