I'm trying to finetune a BERT-Model on Google-colaboratory with TPUs. But I always get the following error:
ValueError: Variable (tf.Variable 'bert_layer_module/bert/encoder/layer_10/attention/output/LayerNorm/beta:0' shape=(768,) dtype=float32) was not created in the distribution strategy scope of (tensorflow.python.distribute.tpu_strategy.TPUStrategyV1 object at 0x7f6a1fad3390). It is most likely due to not all layers or the model or optimizer being created outside the distribution strategy scope. Try to make sure your code looks similar to the following.
with strategy.scope():
model=_create_model()
model.compile(...)
My code is based on this notebook! I altered it for my specific problem and obviously tried to run it on TPU.
I have a costum Layer BertLayer which is apparently created outside the scope:
class BertLayer(tf.keras.layers.Layer):
def __init__(self, n_fine_tune_layers=10, **kwargs):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
bert_path,
trainable=self.trainable,
name="{}_module".format(self.name)
)
trainable_vars = self.bert.variables
# Remove unused layers
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
# Select how many layers to fine tune
trainable_vars = trainable_vars[-self.n_fine_tune_layers :]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
# Add non-trainable weights
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
return result
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
The model creation is done here:
def build_model(max_seq_length):
output_classes = train_labels[0].shape
# Build model
in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
# Instantiate the custom Bert Layer defined above
bert_output = BertLayer(n_fine_tune_layers=10)(bert_inputs)
# Build the rest of the classifier
dense = tf.keras.layers.Dense(256, activation='relu')(bert_output)
pred = tf.keras.layers.Dense(train_labels.shape[1], activation='sigmoid')(dense)
model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
return model
the error occures when calling model.compile
strategy = tf.distribute.experimental.TPUStrategy(
tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS))
with strategy.scope():
model = build_model(256)
opt = tf.train.AdamOptimizer(0.001)
model.compile(loss='binary_crossentropy', optimizer=opt)
As I understand it, the BertLayer is indeed created within the scope, but I'm relatively new to keras and tensorflow, so I'm happy for your help. I'm working on tensorflow 1.14