0

How do you save a tensorflow keras model to disk in h5 format when the model is trained in the scikit learn pipeline fashion? I am trying to follow this example but not having any luck.

This works to train the models:

import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import os
import joblib


ELECTRIC_POINT = 'total_main_kw'


# clean dataset
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    cleaner = (f'dataset has been cleaned')
    print(cleaner)
    return df[indices_to_keep].astype(np.float64)

def my_model(input_shape):
    # create model
    model = Sequential()
    model.add(Dense(22, input_shape=input_shape, kernel_initializer='normal', activation='relu'))
    model.add(Dense(14, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

# load data
df = pd.read_csv('./all_data.csv', index_col=[0], parse_dates=True)
df = clean_dataset(df)


# shuffle the DataFrame rows
df = df.sample(frac=1)

X = np.array(df.drop([ELECTRIC_POINT], 1))
Y = np.array(df[ELECTRIC_POINT])

# set the input shape
input_shape = (X.shape[1],)
print(f'Feature shape: {input_shape}')

# define the Keras model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=my_model, input_shape=input_shape, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)

# define number of models to train
num_models = int(np.sqrt(input_shape))
#num_models = 2
print(f'Number of models: {num_models}')

# define k-fold cross-validation
kfold = KFold(n_splits=num_models)

# define early stopping and model checkpoint callbacks
callbacks = [EarlyStopping(monitor='val_loss', patience=10), 
             ModelCheckpoint(filepath=os.path.join(os.path.curdir, 'model.h5'), 
             monitor='val_loss', save_best_only=True)]

# evaluate the model using cross-validation with callbacks
results = []
for train_idx, test_idx in kfold.split(X, Y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = Y[train_idx], Y[test_idx]
    pipeline.fit(X_train, y_train, mlp__validation_data=(X_test, y_test), mlp__callbacks=callbacks)
    mse = pipeline.score(X_test, y_test)
    print(f'MSE this round: {mse}')
    results.append(mse)


# report performance
print("MSE: %.2f (%.2f)" % (np.mean(results), np.std(results)))

# compare report performance to electricity summary stats
print(df[ELECTRIC_POINT].agg([np.min,np.max,np.mean,np.median]))

If I print the model_step model_step = pipeline.steps.pop(-1)[1] this will return:

KerasRegressor(
    model=None
    build_fn=<function my_model at 0x0000029156028CA0>
    warm_start=False
    random_state=None
    optimizer=rmsprop
    loss=None
    metrics=None
    batch_size=5
    validation_batch_size=None
    verbose=0
    callbacks=None
    validation_split=0.0
    shuffle=True
    run_eagerly=False
    epochs=100
    input_shape=(27,)
)

And then I can save the pipeline to a pickle file which works I get the pipeline.pkl in my current dir:

# save best trained model to file
joblib.dump(pipeline, os.path.join(os.path.curdir, 'pipeline.pkl'))

But trying to run keras save_model:

models.save_model(model_step.model, os.path.join(os.path.curdir, 'model.h5'))

I get an error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_27056\3991387177.py in <module>
----> 1 models.save_model(model_step.model, os.path.join(os.path.curdir, 'model.h5'))

~\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
     68             # To get the full stack trace, call:
     69             # `tf.debugging.disable_traceback_filtering()`
---> 70             raise e.with_traceback(filtered_tb) from None
     71         finally:
     72             del filtered_tb

~\Anaconda3\lib\site-packages\keras\saving\legacy\saving_utils.py in try_build_compiled_arguments(model)
    349     if (
    350         not version_utils.is_v1_layer_or_model(model)
--> 351         and model.outputs is not None
    352     ):
    353         try:

AttributeError: 'NoneType' object has no attribute 'outputs'
desertnaut
  • 57,590
  • 26
  • 140
  • 166
bbartling
  • 3,288
  • 9
  • 43
  • 88
  • You should [edit] your question to include the full error traceback, as that contains valuable information about the issue – G. Anderson Apr 11 '23 at 20:32

1 Answers1

0

If I'm correct (maybe not), your model_step is a KerasRegressor object.

And when you print the model_step, seem like the model properties is None.

trgiangdo
  • 76
  • 2
  • if change the last line to: `model_step.save_model(model_step.model, os.path.join(os.path.curdir, 'model.h5'))` this will error out: `AttributeError: 'KerasRegressor' object has no attribute 'save_model'`. Would you have any ideas to try? – bbartling Apr 12 '23 at 13:23