I'm currently implement the sequantial deep matching model (https://arxiv.org/abs/1909.00385) using tensorflow 2.3. And I included the preprocessing layer as part of the model via subclassing keras.layers.Layer
.
The preprocessing part of code is listed below
class Preprocessing(keras.layers.Layer):
def __init__(self, str_columns, hash_bins, float_columns, float_buckets, embedding_dim, user_columns, short_seq_columns, prefer_seq_columns, item_key_feats,
item_key_hash_bucket_size, series_feats, series_feats_hash_bucket_size, deviceid_num, device_list, **kwargs):
super(Preprocessing, self).__init__(**kwargs)
self.str_columns = str_columns
self.hash_bins = hash_bins
self.float_columns = float_columns
self.float_buckets = float_buckets
self.embedding_dim = embedding_dim
self.user_columns = user_columns
self.short_seq_columns = short_seq_columns
self.prefer_seq_columns = prefer_seq_columns
self.item_key_feats = item_key_feats
self.item_key_hash_bucket_size = item_key_hash_bucket_size
self.series_feats = series_feats
self.series_feats_hash_bucket_size = series_feats_hash_bucket_size
self.deviceid_num = deviceid_num
self.device_list = device_list
self.user_outputs = {}
self.short_outputs = {}
self.prefer_outputs = {}
deviceid_lookup = keras.layers.experimental.preprocessing.StringLookup(vocabulary=device_list, mask_token=None, oov_token="-1")
deviceid_embedding = keras.layers.Embedding(input_dim=deviceid_num, output_dim=embedding_dim)
item_key_hashing = keras.layers.experimental.preprocessing.Hashing(num_bins=item_key_hash_bucket_size)
item_key_embedding = keras.layers.Embedding(input_dim=item_key_hash_bucket_size, output_dim=embedding_dim)
series_hashing = keras.layers.experimental.preprocessing.Hashing(num_bins=series_feats_hash_bucket_size)
series_embedding = keras.layers.Embedding(input_dim=series_feats_hash_bucket_size, output_dim=embedding_dim)
for i in str_columns:
if i == "device_id":
process = [deviceid_lookup, deviceid_embedding]
elif i in item_key_feats:
process = [item_key_hashing, item_key_embedding]
elif i in series_feats:
process = [series_hashing, series_embedding]
else:
hashing = keras.layers.experimental.preprocessing.Hashing(num_bins=hash_bins[i])
embedding = keras.layers.Embedding(input_dim=hash_bins[i], output_dim=embedding_dim)
process = [hashing, embedding]
if i in user_columns:
self.user_outputs[i] = process
if i in short_seq_columns:
self.short_outputs[i] = process
if i in prefer_seq_columns:
self.prefer_outputs[i] = process
for l in float_columns:
discrete = keras.layers.experimental.preprocessing.Discretization(bins=float_buckets[l])
embedding = keras.layers.Embedding(input_dim=len(float_buckets[l]) + 1, output_dim=embedding_dim)
if l in user_columns:
self.user_outputs[l] = [discrete, embedding]
if l in short_seq_columns:
self.short_outputs[l] = [discrete, embedding]
if l in prefer_seq_columns:
self.prefer_outputs[l] = [discrete, embedding]
@staticmethod
def get_embedding(input_tmp, name, embed_dict):
func = embed_dict[name]
if len(func) < 2:
print(func)
raise Exception('Not enough function to retrieve embedding')
output = func[0](input_tmp)
output = func[1](output)
return output
def call(self, inputs):
user_embedding = tf.concat([tf.reduce_mean(self.get_embedding(inputs[i], i, self.user_outputs), axis=[1, 2]) for i in self.user_columns], axis=-1)
short_embedding = tf.concat([tf.squeeze(self.get_embedding(inputs[l], l, self.short_outputs), axis=1).to_tensor() for l in self.short_seq_columns], axis=-1)
prefer_embedding = {k: tf.squeeze(self.get_embedding(inputs[k], k, self.prefer_outputs).to_tensor(), axis=1) for k in self.prefer_seq_columns}
return user_embedding, short_embedding, prefer_embedding
And also my input code:
def read_row(csv_row):
record_defaults = [[0.]] * numeric_feature_size + [['']] * category_feature_size + [['0-0']] + [['0']]
row = tf.io.decode_csv(csv_row, record_defaults=record_defaults, field_delim='', use_quote_delim=False)
features = []
for i, feature in enumerate(row):
if i < numeric_feature_size:
features.append(feature)
elif i < numeric_feature_size + category_feature_size:
tmp_tf = tf.strings.split([feature], ";")
features.append(tmp_tf)
res = OrderedDict(zip(numeric_columns + category_columns, features))
res['target'] = [tf.cast(row[-2], tf.string)]
return res
The other part of code is not giving here, cause I believe it's right, and might be too much to list here.
The model is working correctly during training using model.compile
then model.fit
, however, after I saved it with model.save(path)
, the resulting Graph gets many unknown inputs and none of the inputs name is saved.
saved_model_cli show --dir ./ --tag_set serve --signature_def serving_default
The given SavedModel SignatureDef contains the following input(s):
inputs['args_0'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0:0
inputs['args_0_1'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_1:0
inputs['args_0_10'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_10:0
inputs['args_0_11'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_11:0
inputs['args_0_12'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_12:0
inputs['args_0_13'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_13:0
inputs['args_0_14'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_14:0
inputs['args_0_15'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_15:0
inputs['args_0_16'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_16:0
inputs['args_0_17'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_17:0
inputs['args_0_18'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_18:0
inputs['args_0_19'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_19:0
inputs['args_0_2'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_2:0
inputs['args_0_20'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_20:0
inputs['args_0_21'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_21:0
inputs['args_0_22'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_22:0
inputs['args_0_23'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_23:0
inputs['args_0_24'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_24:0
inputs['args_0_25'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_25:0
inputs['args_0_26'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_26:0
inputs['args_0_27'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_27:0
inputs['args_0_28'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_28:0
inputs['args_0_29'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_29:0
inputs['args_0_3'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_3:0
inputs['args_0_30'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_30:0
inputs['args_0_31'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_31:0
inputs['args_0_32'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_32:0
inputs['args_0_33'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_33:0
inputs['args_0_34'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_34:0
inputs['args_0_35'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_35:0
inputs['args_0_36'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_36:0
inputs['args_0_37'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_37:0
inputs['args_0_38'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_38:0
inputs['args_0_39'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_39:0
inputs['args_0_4'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_4:0
inputs['args_0_40'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_40:0
inputs['args_0_41'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_41:0
inputs['args_0_42'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_42:0
inputs['args_0_43'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_43:0
inputs['args_0_44'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_44:0
inputs['args_0_45'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_45:0
inputs['args_0_46'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_46:0
inputs['args_0_47'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_47:0
inputs['args_0_48'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_48:0
inputs['args_0_49'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_49:0
inputs['args_0_5'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_5:0
inputs['args_0_50'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_50:0
inputs['args_0_6'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_6:0
inputs['args_0_7'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_7:0
inputs['args_0_8'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: serving_default_args_0_8:0
inputs['args_0_9'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: serving_default_args_0_9:0
The given SavedModel SignatureDef contains the following output(s):
outputs['output_1'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 64)
name: StatefulPartitionedCall:0
In this model, I only used the categorical features with dtype as tf.string
, so all the inputs with dtype of DT_INT64
is not part of my model inputs.
Can anyone help me with this?