I have a working RNN using the default softmax loss function for tf.contrib.seq2seq.sequence_loss()
(which I'm assuming is tf.nn.softmax()
) but would instead like to use tf.nn.softmax_cross_entropy_with_logits()
. According to the seq2seq.sequence_loss documentation, one may use softmax_loss_function=
to override the default loss function:
softmax_loss_function: Function (labels, logits) -> loss-batch to be used instead of the standard softmax (the default if this is None). Note that to avoid confusion, it is required for the function to accept named arguments.
Here is my code that works:
from tensorflow.python.layers.core import Dense
# Build the graph
train_graph = tf.Graph()
# Set the graph to default to ensure that it is ready for training
with train_graph.as_default():
# Load the model inputs
input_data, targets, keep_prob, lr, target_sequence_length, max_target_sequence_length, source_sequence_length \
= get_model_inputs()
# Create the training and inference logits
training_decoder_output, inference_decoder_output = seq2seq_model(input_data,
targets,
lr,
target_sequence_length,
max_target_sequence_length,
source_sequence_length,
len(source_letter_to_int),
len(target_letter_to_int),
encoding_embedding_size,
decoding_embedding_size,
rnn_size,
num_layers,
keep_prob)
# Create tensors for the training logits and inference logits
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
inference_logits = tf.identity(inference_decoder_output.sample_id, name='predictions')
# Create the weights for sequence_loss
masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks')
with tf.name_scope("optimization"):
# Loss function
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(lr)
# Gradient Clipping
gradients = optimizer.compute_gradients(cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
train_op = optimizer.apply_gradients(capped_gradients)
# Add variables to collection in order to load them up when retraining a saved graph
tf.add_to_collection("cost", cost)
tf.add_to_collection("train_op", train_op)
My attempt to change the loss function is as follows (I've only indicated the code that is different):
with tf.name_scope("optimization"):
# One-hot encode targets and reshape to match logits, one row per batch_size per step
y_one_hot = tf.one_hot(targets, len(target_letter_to_int))
y_reshaped = tf.reshape(y_one_hot, [batch_size, len(target_letter_to_int), 30])
# Loss function
loss = tf.nn.softmax_cross_entropy_with_logits(logits=training_logits, labels=y_reshaped)
loss = tf.reduce_mean(loss)
cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss)
The line cost = tf.contrib.seq2seq.sequence_loss(training_logits, targets, masks, softmax_loss_function=loss)
is now giving me "TypeError: 'Tensor' object is not callable." This is one of the most opaque errors I've seen Tensorflow produce and I haven't found much of anything in the way of explanation on the internet. Any help would be appreciated.