I am relatively new to tensorflow and I am working on relation classification. I will list down my problem step wise so that it is clear and hope that someone can point out my mistake( which I am sure must be a silly one):
- For the word embedding layer I needed to initialize a tf variable with a tensor which was of size more that 2GB. So I followed the solutions provided here and changed my code.
Code snippets before change :
train.py
if FLAGS.model_type == 'cnn':
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
cnn = textCNN(
sequence_length=x_trains[0].shape[1],
num_classes=num_classes,
vocab_size=len(word_embed_vecs),
embedding_size=FLAGS.embedding_dim,
dist_vocab_size=dist_vocab_size,
dist_size=FLAGS.pos_dim,
filter_sizes=list(map(int,
FLAGS.filter_sizes.split(","))),
num_filters=FLAGS.num_filters,
l2_reg_lambda=FLAGS.l2_reg_lambda,
word_vecs=word_embed_vecs, #word_embed_vecs is of shape
#(2451510, 300)
train_emb=FLAGS.train_emb)
text_cnn.py
class textCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and
softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, dist_vocab_size, dist_size, filter_sizes, num_filters,
l2_reg_lambda=0.0, word_vecs=None, train_emb=True):
# Placeholders for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
name="input_x")
self.e1_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e1_dist")
self.e2_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e2_dist")
self.input_y = tf.placeholder(tf.float32, [None, num_classes],
name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32,
name="dropout_keep_prob")
l2_loss = tf.constant(0.0)
# Embedding layer
with tf.device('/gpu:0'), tf.name_scope("embedding"):
#with tf.name_scope("embedding"):
self.W = tf.Variable(
trainable=train_emb,
initial_value=tf.constant(word_vecs, dtype=tf.float32), name="W")
self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars,
-1)
Code snippets after change:
train.py
if FLAGS.model_type == 'cnn':
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
cnn = textCNN(
sequence_length=x_trains[0].shape[1],
num_classes=num_classes,
vocab_size=len(word_embed_vecs),
embedding_size=FLAGS.embedding_dim,
dist_vocab_size=dist_vocab_size,
dist_size=FLAGS.pos_dim,
filter_sizes=list(map(int,
FLAGS.filter_sizes.split(","))),
num_filters=FLAGS.num_filters,
l2_reg_lambda=FLAGS.l2_reg_lambda,
**# word_vecs=word_embed_vecs, #word_embed_vecs is of
shape (2451510, 300)**
train_emb=FLAGS.train_emb)
# Generate batches
dev_f1s = []
test_f1s = []
for fi in range(len(x_trains)):
sess.run(tf.global_variables_initializer())
batches = data_helpers.batch_iter(
list(zip(x_trains[fi], train_e1_dists[fi], train_e2_dists[fi],
y_trains[fi])), FLAGS.batch_size, FLAGS.embedding_dim,
FLAGS.num_epochs)
# Training loop. For each batch...
evaluate_every = len(x_trains[fi]) / FLAGS.batch_size
for batch in batches:
x_batch, e1_dist, e2_dist, y_batch = zip(*batch)
# Train
feed_dict = {
cnn.input_x: x_batch,
cnn.e1_dist: e1_dist,
cnn.e2_dist: e2_dist,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
cnn.wordvecs: word_embed_vecs
}
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, cnn.loss,
cnn.accuracy], feed_dict)
text_cnn.py
class textCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and
softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, dist_vocab_size, dist_size, filter_sizes, num_filters,
l2_reg_lambda=0.0, train_emb=True):
# Placeholders for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
name="input_x")
self.e1_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e1_dist")
self.e2_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e2_dist")
self.input_y = tf.placeholder(tf.float32, [None, num_classes],
name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32,
name="dropout_keep_prob")
self.wordvecs = tf.placeholder(tf.float32, shape = (2451510, 300),
name = "wordvecs")
l2_loss = tf.constant(0.0)
# Embedding layer
with tf.device('/gpu:0'), tf.name_scope("embedding"):
#with tf.name_scope("embedding"):
#self.W = tf.Variable(
# trainable=False,
# initial_value=tf.constant(word_vecs, dtype=tf.float32),
# name="W")
self.WordVecs = tf.Variable(trainable = False,
initial_value=self.wordvecs, name="WordVecs")
self.embedded_chars = tf.nn.embedding_lookup(self.WordVecs,
self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars,
-1)
So basically I added a placeholder of the shape of my embedding vectors and passed it in feeddict while calling sess.run. however I am getting InvalidArguementError. The traceback details are as below:
Traceback (most recent call last):
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/errors_impl.py", line 473, in
__exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must
feed a value for placeholder tensor 'wordvecs' with dtype float and shape
[2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 470, in <module>
tf.app.run()
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "train.py", line 466, in main
train()
File "train.py", line 407, in train
sess.run(tf.global_variables_initializer())
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must
feed a value for placeholder tensor 'wordvecs' with dtype float and shape
[2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'wordvecs', defined at:
File "train.py", line 470, in <module>
tf.app.run()
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "train.py", line 466, in main
train()
File "train.py", line 310, in train
train_emb=FLAGS.train_emb)
File "/home/mishra/Project/RelExtractKBP/text_cnn.py", line 20, in
__init__
self.wordvecs = tf.placeholder(tf.float32, shape=(2451510, 300),
name="wordvecs")
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/ops/array_ops.py", line 1599, in placeholder
return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/ops/gen_array_ops.py", line 3091, in
_placeholder
"Placeholder", dtype=dtype, shape=shape, name=name)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/op_def_library.py", line 787, in
_apply_op_helper
op_def=op_def)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint:
disable=protected-access
InvalidArgumentError (see above for traceback): You must feed a value for
placeholder tensor 'wordvecs' with dtype float and shape [2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
From what I understand, this error may occur if I do not pass the value to the placeholder while calling sess.run. However, I am not sure what I need to do when calling sess.run(tf.global_variables_initializer()) which is generating this error as per the Traceback.
Any pointers will really help. Thanks a lot.