So I am running this code for taking single sample gradients in tensorflow, using tf. gradients() just once. It works totally fine when I just have one set of variables:
def f(x, W):
return tf.matmul(x, W)
graph1 = tf.Graph()
with graph1.as_default():
W = tf.get_variable('wd1', shape=(4,2), initializer=tf.contrib.layers.xavier_initializer())
X = tf.placeholder(tf.float32, [None, 4], name="X")
a = tf.placeholder(tf.float32, [None, 2], name="a")
split_x = tf.unstack(X, num = 5, axis=0)
q = [tf.expand_dims(split_x[i], 0) for i in range(10)]
split_a = tf.unstack(a, num = 5, axis=0)
weight_copies = [tf.identity(W) for x in split_x]
output = tf.stack([f(x, w) for (x,w) in zip(q, weight_copies)])
cost_function = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = split_a)
grad_sample = tf.gradients(cost_function, weight_copies)
init = tf.global_variables_initializer()
with tf.Session(graph=graph1) as sess:
init.run()
x_sample, a_sample = fetch_data()
sample_gradient = sess.run(grad_sample, feed_dict= {X: x_sample, a: a_sample})
When I get sample_gradient back, I get this:
[array([[ 0.18143043, -0.18143043],
[ 0.52393097, -0.52393097],
[ 0.00645372, -0.00645372],
[ 0.5445979 , -0.5445979 ]], dtype=float32),
array([[ 0.14291364, -0.14291364],
[ 0.20688654, -0.20688656],
[ 0.10543547, -0.10543548],
[ 0.22011465, -0.22011466]], dtype=float32),
array([[-0.7126652 , 0.7126652 ],
[-0.6779973 , 0.6779973 ],
[-0.166183 , 0.166183 ],
[-0.26456204, 0.26456204]], dtype=float32),
array([[ 0.18173395, -0.1817339 ],
[ 0.07825114, -0.07825112],
[ 0.01917152, -0.01917152],
[ 0.00939495, -0.00939495]], dtype=float32),
array([[-0.44272456, 0.44272456],
[-0.373744 , 0.373744 ],
[-0.6325189 , 0.6325189 ],
[-0.3546079 , 0.3546079 ]], dtype=float32),
array([[0.22570643, 0.33411032],
[0.18552302, 0.27462733],
[0.10510764, 0.15558948],
[0.26456347, 0.3916299 ]], dtype=float32),
array([[0.01360785, 0.00632451],
[0.42770416, 0.1987838 ],
[0.58186275, 0.27043203],
[0.6803998 , 0.31622902]], dtype=float32),
array([[0.20075822, 0.44605315],
[0.10817435, 0.24034637],
[0.12729922, 0.2828388 ],
[0.08586845, 0.19078615]], dtype=float32),
array([[0.20855448, 0.5140207 ],
[0.19167873, 0.4724273 ],
[0.13740788, 0.3386669 ],
[0.06688789, 0.16485746]], dtype=float32),
array([[0.25728938, 0.41753346],
[0.33400562, 0.5420298 ],
[0.27250844, 0.44223118],
[0.2579242 , 0.41856363]], dtype=float32)]
Which is correct. However if I just add bias or any other set of weights to it and want to do the same, I get error:
def f(x, W):
return tf.add(tf.matmul(x, W[0]),W[1])
graph1 = tf.Graph()
with graph1.as_default():
W = [tf.get_variable('wd1', shape=(4,2), initializer=tf.contrib.layers.xavier_initializer()),
tf.get_variable('bd1', shape=(2), initializer=tf.contrib.layers.xavier_initializer())]
X = tf.placeholder(tf.float32, [None, 4], name="X")
a = tf.placeholder(tf.float32, [None, 2], name="a")
logits = f(X, W)
split_x = tf.unstack(X, num = 5, axis=0)
q = [tf.expand_dims(split_x[i], 0) for i in range(5)]
split_a = tf.unstack(a, num = 5, axis=0)
weight_copies = [[tf.identity(v) for v in W] for x in split_x]
qqq = list(zip(*weight_copies))
output = tf.stack([f(x, w) for (x,w) in zip(q, weight_copies)])
cost_function = tf.nn.softmax_cross_entropy_with_logits(logits = output, labels = split_a)
grad_sample = tf.gradients(cost_function, qqq)
init = tf.global_variables_initializer()
with tf.Session(graph=graph1) as sess:
init.run()
x_sample, a_sample = fetch_data()
sample_gradient = sess.run(grad_sample, feed_dict= {X: x_sample, a: a_sample})
TypeError: Fetch argument None has invalid type
I am sure the problem is happening in the line with tf.gradients() function however I do not know how to change the xs argument of function so it does not return None.
Does anyone have any idea how to fix this?
FYI:
def fetch_data():
x_sample = np.random.rand(5,4)
a_sample = np.zeros((5,2))
a_sample[0,1] = 1
a_sample[1,1] = 1
a_sample[2,0] = 1
a_sample[3,1] = 1
a_sample[4,0] = 1
return x_sample, a_sample