I am building a keras model. The features are coming from pandas.DataFrame. I build the tf.Dataset through from_generator API. I followed this page to process the categorical string features.
output_sig= ...
features = [...]
def iter_to_gen(it):
def f():
for x in it:
# x is a list, with the last element being the label
key_to_feature = {key: x[i] for i, key in enumerate(features)}
yield key_to_feature, x[-1]
return f
train_ds = tf.data.Dataset.from_generator( iter_to_gen(map(tuple, train_data.values)), output_signature=output_sig, name='train').batch(batch_size)
def get_category_encoding_layer(name, dataset, dtype, max_tokens=None):
# Create a layer that turns strings into integer indices.
if dtype == 'string':
index = layers.StringLookup(max_tokens=max_tokens)
# Otherwise, create a layer that turns integer values into integer indices.
else:
index = layers.IntegerLookup(max_tokens=max_tokens)
# Prepare a `tf.data.Dataset` that only yields the feature.
feature_ds = dataset.map(lambda x, y : x[name])
# Learn the set of possible values and assign them a fixed integer index.
index.adapt(feature_ds)
# Encode the integer indices.
encoder = layers.CategoryEncoding(num_tokens=index.vocabulary_size())
# Apply multi-hot encoding to the indices. The lambda function captures the
# layer, so you can use them, or include them in the Keras Functional model later.
return lambda feature: encoder(index(feature))
all_inputs = []
encoded_features = []
categorical_cols = ['feature_A']
for header in categorical_cols:
if header == 'feature_A':
categorical_col = tf.keras.Input(shape=(None,), name=header, dtype='string')
else:
categorical_col = tf.keras.Input(shape=(1,), name=header, dtype='string')
encoding_layer = get_category_encoding_layer(name=header,
dataset=train_ds,
dtype='string',
max_tokens=50) # tune the max tokens
encoded_categorical_col = encoding_layer(categorical_col)
all_inputs.append(categorical_col)
encoded_features.append(encoded_categorical_col)
all_features = tf.keras.layers.concatenate(encoded_features)
print(all_features.shape)
x = tf.keras.layers.Dense(32, activation="relu")(all_features)
# x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(num_class)(x)
model = tf.keras.Model(all_inputs, output)
model.compile(optimizer='SGD',
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
metrics=["accuracy"],
run_eagerly=True)
model.fit(train_ds, epochs=10, verbose=2) <------ ValueError: Unexpected result of #`train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or #`tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a #issue/bug to `tf.keras`.
And then if I reproduce the train_ds
and skip directly to run model.fit
, it would run only 2 epochs and end. I am wondering why is it.
Epoch 1/10
4984/4984 - 71s - loss: 2.5564 - accuracy: 0.4191 - 71s/epoch - 14ms/step
Epoch 2/10
4984/4984 - 0s - loss: 0.0000e+00 - accuracy: 0.0000e+00 - 12ms/epoch - 2us/step
<keras.callbacks.History at 0x....>
I found the first error was raised because model.fit
got a empty dataset. I also verified the size of dataset by dataset.as_numpy_array() and it is empty. I am wondering why.
Thanks.