I'm getting this value error:ValueError: setting an array element with a sequence, when doing back_prop in tensorflow. I'm using large IMDB dataset and glove 50d pre-trained vectors. I have tried everything converting multi-dimensional list into np.array, converting individual lists into np.array and also did reshape operation x = x.reshape((batch,time_steps,embedding))
on x but it gave me value error ValueError: total size of new array must be unchanged. I think something is wrong with my input but don't know what? You could run this code on your pc by downloading IMDB dataset and 50d glove vectors. Please Help!
Traceback (most recent call last):
File "nlp.py", line 109, in <module>
sess.run(minimize_loss,feed_dict={X : x, Y : y})
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 619, in _run
np_val = np.array(subfeed_val, dtype=subfeed_dtype)
ValueError: setting an array element with a sequence.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
import math
import os
from nltk.tokenize import TweetTokenizer
batch = 500
start = 0
end = batch - 1
learning_rate = 0.2
num_classes = 8
path = "/home/indy/Downloads/aclImdb/train/pos"
time_steps = 250
embedding = 50
def get_embedding():
gfile_path = os.path.join("/home/indy/Downloads/glove.6B", "glove.6B.50d.txt")
f = open(gfile_path,'r')
embeddings = {}
for line in f:
sp_value = line.split()
word = sp_value[0]
embedding = [float(value) for value in sp_value[1:]]
embeddings[word] = embedding
return embeddings
ebd = get_embedding()
def get_y(file_name):
y_value = file_name.split('_')
y_value = y_value[1].split('.')
return y_value[0]
def get_x(path,file_name):
file_path = os.path.join(path,file_name)
x_value = open(file_path,'r')
for line in x_value:
x_value = line.replace("<br /><br />","")
x_value = x_value.lower()
tokeniz = TweetTokenizer()
x_value = tokeniz.tokenize(x_value)
padding = 250 - len(x_value)
if padding > 0:
p_value = ['pad' for i in range(padding)]
x_value = np.concatenate((x_value,p_value))
x_value = [ebd['value'] for value in x_value]
return x_value
def batch_f(path):
directory = os.listdir(path)
y = [get_y(directory[i]) for i in range(len(directory))]
x = [get_x(path,directory[i]) for i in range(len(directory))]
return x,y
X = tf.placeholder(tf.float32, [batch,time_steps,embedding])
Y = tf.placeholder(tf.int32, [batch])
def build_nlp_model(x, _units, lstm_layers,num_classes):
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, embedding])
x = tf.split(0, time_steps, x)
lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units, state_is_tuple = True)
multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True)
outputs , state = tf.nn.rnn(multi_lstm,x, dtype = tf.float32)
weights = tf.Variable(tf.random_normal([_units,num_classes]))
biases = tf.Variable(tf.random_normal([num_classes]))
logits = tf.matmul(outputs[-1], weights) + biases
return logits
logits = build_nlp_model(X,400,4,num_classes)
c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y)
loss = tf.reduce_mean(c_loss)
decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9)
optimizer= tf.train.AdamOptimizer(decayed_learning_rate)
minimize_loss = optimizer.minimize(loss)
correct_predict = tf.nn.in_top_k(logits, Y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
for i in range(25):
x, y = batch_f(path)
sess.run(minimize_loss,feed_dict={X : x, Y : y})
accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
cost = sess.run(loss,feed_dict = {X: x,Y: y})
start = end
end = (start + batch)
print ("Minibatch Loss = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu))
EDIT: The other error that I'm getting, when I run the code.
(500, 250, 50)
(500,)
Traceback (most recent call last):
File "nlp.py", line 115, in <module>
accu = sess.run(accuracy,feed_dict = {X: x, Y: y})
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 372, in run
run_metadata_ptr)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 636, in _run
feed_dict_string, options, run_metadata)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 708, in _do_run
target_list, options, run_metadata)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 728, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: targets[0] is out of range
[[Node: InTopK = InTopK[T=DT_INT32, k=1, _device="/job:localhost/replica:0/task:0/cpu:0"](add, _recv_Placeholder_1_0)]]
Caused by op u'InTopK', defined at:
File "nlp.py", line 102, in <module>
correct_predict = tf.nn.in_top_k(logits, Y, 1)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 890, in in_top_k
targets=targets, k=k, name=name)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 704, in apply_op
op_def=op_def)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2260, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/indy/tensorflow/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1230, in __init__
self._traceback = _extract_stack()