How do you save a tensorflow keras model to disk in h5 format when the model is trained in the scikit learn pipeline fashion? I am trying to follow this example but not having any luck.
This works to train the models:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import os
import joblib
ELECTRIC_POINT = 'total_main_kw'
# clean dataset
def clean_dataset(df):
assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
df.dropna(inplace=True)
indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
cleaner = (f'dataset has been cleaned')
print(cleaner)
return df[indices_to_keep].astype(np.float64)
def my_model(input_shape):
# create model
model = Sequential()
model.add(Dense(22, input_shape=input_shape, kernel_initializer='normal', activation='relu'))
model.add(Dense(14, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
# load data
df = pd.read_csv('./all_data.csv', index_col=[0], parse_dates=True)
df = clean_dataset(df)
# shuffle the DataFrame rows
df = df.sample(frac=1)
X = np.array(df.drop([ELECTRIC_POINT], 1))
Y = np.array(df[ELECTRIC_POINT])
# set the input shape
input_shape = (X.shape[1],)
print(f'Feature shape: {input_shape}')
# define the Keras model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=my_model, input_shape=input_shape, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
# define number of models to train
num_models = int(np.sqrt(input_shape))
#num_models = 2
print(f'Number of models: {num_models}')
# define k-fold cross-validation
kfold = KFold(n_splits=num_models)
# define early stopping and model checkpoint callbacks
callbacks = [EarlyStopping(monitor='val_loss', patience=10),
ModelCheckpoint(filepath=os.path.join(os.path.curdir, 'model.h5'),
monitor='val_loss', save_best_only=True)]
# evaluate the model using cross-validation with callbacks
results = []
for train_idx, test_idx in kfold.split(X, Y):
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = Y[train_idx], Y[test_idx]
pipeline.fit(X_train, y_train, mlp__validation_data=(X_test, y_test), mlp__callbacks=callbacks)
mse = pipeline.score(X_test, y_test)
print(f'MSE this round: {mse}')
results.append(mse)
# report performance
print("MSE: %.2f (%.2f)" % (np.mean(results), np.std(results)))
# compare report performance to electricity summary stats
print(df[ELECTRIC_POINT].agg([np.min,np.max,np.mean,np.median]))
If I print the model_step model_step = pipeline.steps.pop(-1)[1]
this will return:
KerasRegressor(
model=None
build_fn=<function my_model at 0x0000029156028CA0>
warm_start=False
random_state=None
optimizer=rmsprop
loss=None
metrics=None
batch_size=5
validation_batch_size=None
verbose=0
callbacks=None
validation_split=0.0
shuffle=True
run_eagerly=False
epochs=100
input_shape=(27,)
)
And then I can save the pipeline to a pickle file which works I get the pipeline.pkl
in my current dir:
# save best trained model to file
joblib.dump(pipeline, os.path.join(os.path.curdir, 'pipeline.pkl'))
But trying to run keras save_model
:
models.save_model(model_step.model, os.path.join(os.path.curdir, 'model.h5'))
I get an error:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_27056\3991387177.py in <module>
----> 1 models.save_model(model_step.model, os.path.join(os.path.curdir, 'model.h5'))
~\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
~\Anaconda3\lib\site-packages\keras\saving\legacy\saving_utils.py in try_build_compiled_arguments(model)
349 if (
350 not version_utils.is_v1_layer_or_model(model)
--> 351 and model.outputs is not None
352 ):
353 try:
AttributeError: 'NoneType' object has no attribute 'outputs'