I have training dataset of length 8000 with 4 features. With Window size of 5, I have output of LSTM autoencoder in the shape of (7996,5,4). But I required in the shape of (7996,4) to compare with input what I can do?
Actually I want to plot MAE loss distribution(histplot).Suggest Any instruction for the same. predicted output is:
# import libraries
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
#from sklearn.externals import joblib
import joblib
import seaborn as sns
sns.set(color_codes=True)
import matplotlib.pyplot as plt
%matplotlib inline
from numpy.random import seed
from tensorflow.random import set_seed
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
#tf.logging.set_verbosity(tf.logging.ERROR)
#from tensorflow.logging import set_verbosity(tf.logging.ERROR)
from keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from keras.models import Model
from keras import regularizers
from google.colab import files
uploaded = files.upload()
# set random seed
seed(10)
set_seed(10)
#np.random.seed(10)
df = pd.read_csv("envdataset.csv")
df['Date'] = pd.to_datetime(df['Date'],dayfirst = True)
df1=df.drop(['ID'], axis = 1)
df1.set_index('Date', inplace= True)
#to remove date string
df1.rename_axis(None, inplace=True)
print(df1)
df1_train, df1_test = df1[df1.index < '2018-02-18 20:00:00'], df1[df1.index >= '2018-02-18 20:00:00']
print("Training dataset shape:", df1_train.shape)
print("Test dataset shape:", df1_test.shape)
train=df1_train
scalers={}
for i in train.columns:
scaler = MinMaxScaler(feature_range=(-1,1))
s_s = scaler.fit_transform(train[i].values.reshape(-1,1))
s_s=np.reshape(s_s,len(s_s))
scalers['scaler_'+ i] = scaler
train[i]=s_s
test=df1_test
for i in test.columns:
scaler = scalers['scaler_'+i]
s_s = scaler.transform(test[i].values.reshape(-1,1))
s_s=np.reshape(s_s,len(s_s))
scalers['scaler_'+i] = scaler
test[i]=s_s
def split_series(series, n_past, n_future):
#
# n_past ==> no of past observations
#
# n_future ==> no of future observations
#
X, y = list(), list()
for window_start in range(len(series)):
past_end = window_start + n_past
future_end = past_end + n_future
if future_end > len(series):
break
# slicing the past and future parts of the window
past, future = series[window_start:past_end, :], series[past_end:future_end, :]
X.append(past)
y.append(future)
return np.array(X), np.array(y)
n_past = 5
n_future = 1
n_features = 5
X_train,Y_train = split_series(train.values,n_past, n_future)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1],n_features))
Y_train = Y_train.reshape((Y_train.shape[0], Y_train.shape[1],n_features))
X_test,Y_test = split_series(test.values,n_past, n_future)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],n_features))
Y_test = Y_test.reshape((Y_test.shape[0], Y_test.shape[1],n_features))
# define the autoencoder network model
def autoencoder_model(X):
inputs = Input(shape=(X.shape[1], X.shape[2]))
L1 = LSTM(32, activation='relu', return_sequences=True,
kernel_regularizer=regularizers.l2(0.01))(inputs)
L2 = LSTM(16, activation='relu', return_sequences=False)(L1)
L3 = RepeatVector(X.shape[1])(L2)
L4 = LSTM(16, activation='relu', return_sequences=True)(L3)
L5 = LSTM(32, activation='relu', return_sequences=True)(L4)
output = TimeDistributed(Dense(X.shape[2]))(L5)
model = Model(inputs=inputs, outputs=output)
return model
# create the autoencoder model
model = autoencoder_model(X_train)
model.compile(optimizer='adam', loss='mae')
model.summary()
# fit the model to the data
nb_epochs = 15
batch_size = 128
history = model.fit(X_train, Y_train, epochs=nb_epochs, batch_size=batch_size, validation_split=0.05).history
# plot the loss distribution of the training set
X_pred = model.predict(X_train)
test_pred= model.predict(X_test)
X_pred.shape
(7996,5,4)
[[[-1.9560766 -2.8735816 -2.181508 0.12179315]
[-1.1410213 -2.9582233 -1.5174161 -0.70458966]
[-1.0294746 -3.2466424 -1.2022992 -0.3793367 ]
[-0.98699576 -3.054132 -1.1821653 -0.4421966 ]
[-0.99007565 -3.191211 -1.1206213 -0.29086077]]
[[-1.9562337 -2.8735654 -2.1815956 0.1217528 ]
[-1.1411219 -2.9582226 -1.5174775 -0.704654 ]
[-1.029608 -3.2467287 -1.2023845 -0.37945104]
[-0.9871716 -3.054176 -1.182281 -0.4423707 ]
[-0.9902895 -3.1912518 -1.120732 -0.29105723]]