I would like to use a sequence of documents to predict a target label:
['some text here', 'some more text here'] --> label
Initially my text sequences are of a fixed length, which I am getting to work before trying to use a padded length. The architecture is as follows:
Input -> HubLayer -> LSTM -> Dense
The following code starts running and then fails:
hub_model = 'https://tfhub.dev/google/nnlm-en-dim50/2'
hub_layer = hub.KerasLayer(hub_model, input_shape=(), dtype='string', trainable=False)
def build_model():
inputs = tf.keras.Input(shape=(), dtype='string')
inputs_1d = tf.reshape(inputs, [-1])
x = hub_layer(inputs_1d)
x = tf.reshape(x, [BATCH_SIZE,2, 50])
x = tf.keras.layers.LSTM(32, activation='relu')(x)
outputs = tf.keras.layers.Dense(y.shape[1], activation='sigmoid')(x)
return tf.keras.Model(inputs, outputs)
The problem is with how to pass a sequence to the keras hub layer (I believe).
Error:
2021-11-02 19:34:34.360697: W tensorflow/core/framework/op_kernel.cc:1680] Invalid argument: required broadcastable shapes
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/tmp/ipykernel_9371/20784351.py in <module>
----> 1 history = model.fit(train, epochs=2, validation_data=test)
/opt/conda/lib/python3.7/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1182 _r=1):
1183 callbacks.on_train_batch_begin(step)
-> 1184 tmp_logs = self.train_function(iterator)
1185 if data_handler.should_sync:
1186 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
883
884 with OptionalXlaContext(self._jit_compile):
--> 885 result = self._call(*args, **kwds)
886
887 new_tracing_count = self.experimental_get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
915 # In this case we have created variables on the first call, so we run the
916 # defunned version which is guaranteed to never create variables.
--> 917 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
918 elif self._stateful_fn is not None:
919 # Release the lock early so that multiple threads can perform the call
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
3038 filtered_flat_args) = self._maybe_define_function(args, kwargs)
3039 return graph_function._call_flat(
-> 3040 filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access
3041
3042 @property
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1962 # No tape is watching; skip to running the function.
1963 return self._build_call_outputs(self._inference_function.call(
-> 1964 ctx, args, cancellation_manager=cancellation_manager))
1965 forward_backward = self._select_forward_and_backward_functions(
1966 args,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
594 inputs=args,
595 attrs=attrs,
--> 596 ctx=ctx)
597 else:
598 outputs = execute.execute_with_cancellation(
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: required broadcastable shapes
[[node gradient_tape/binary_crossentropy/logistic_loss/mul/Mul (defined at tmp/ipykernel_9371/484917154.py:1) ]]
(1) Invalid argument: required broadcastable shapes
[[node gradient_tape/binary_crossentropy/logistic_loss/mul/Mul (defined at tmp/ipykernel_9371/484917154.py:1) ]]
[[model_1/keras_layer_1/StatefulPartitionedCall/StatefulPartitionedCall/StatefulPartitionedCall/tokenize/StringSplit/StringSplit/_23]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_4634]
Function call stack:
train_function -> train_function