I'm trying to build a tf.data
pipeline, ultimately to compute skipgrams, but I get an error
NotImplementedError: Cannot convert a symbolic Tensor (cond/Identity:0) to a numpy array.
This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported
My pipeline:
text_vector_ds = (
text_ds
.batch(1024)
.map(vectorize_layer)
.map(my_func)
)
where
text_ds = tf.data.TextLineDataset(file)
vectorize_layer = tensorflow.keras.layers.experimental.preprocessing.TextVectorization(
standardize='lower_and_strip_punctuation',
max_tokens=4096,
output_mode='int',
output_sequence_length=5)
class MyFunc():
def _make_fat_diagonal(self, size: int) -> tf.Tensor:
fat_ones = tf.linalg.band_part(
tf.ones([size,size], dtype=tf.int64),
num_lower=self.window,
num_upper=self.window
)
return tf.linalg.set_diag(fat_ones, tf.zeros(size, dtype=tf.int64))
def __call__(self, input):
# Ensure the input is rank 2
if tf.rank(input) == 1:
input = tf.expand_dims(input, axis=0)
input_shape = tf.shape(input)
num_input_cols = input_shape[1]
return = self._make_fat_diagonal(num_input_cols)
my_func = MyFunc()
A partial stacktrace is
../testw2v/skipgram/skipgram.py:333 _make_fat_diagonal *
fat_ones = tf.linalg.band_part(
/opt/conda/envs/emb2/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper **
return target(*args, **kwargs)
/opt/conda/envs/emb2/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:3120 ones
output = _constant_if_small(one, shape, dtype, name)
/opt/conda/envs/emb2/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py:2804 _constant_if_small
if np.prod(shape) < 1000:
<__array_function__ internals>:6 prod
/opt/conda/envs/emb2/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3031 prod
keepdims=keepdims, initial=initial, where=where)
/opt/conda/envs/emb2/lib/python3.7/site-packages/numpy/core/fromnumeric.py:87 _wrapreduction
return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
/opt/conda/envs/emb2/lib/python3.7/site-packages/tensorflow/python/framework/ops.py:855 __array__
" a NumPy call, which is not supported".format(self.name))
NotImplementedError: Cannot convert a symbolic Tensor (cond/Identity:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported
I suppose that it doesn't like extracting a dimension to run _make_fat_diagonal()
, although I'm not sure how else I would express this. Outside of a pipeline the function works just fine on individual elements of the text_ds
dataset. As you can see I'm careful to only use Tensorflow methods.
What's the correct approach?