I’m trying to use mixed precision for transformer vanilla with cpu but i have a problem with the compute of gradient
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
Detected at node 'gradient_tape/model/iencoder/encoder/encoder_layer_1/conv1d_3/Conv1D/Conv2DBackpropFilter' defined at (most recent call last):
File "train_transformer.py", line 110, in <module>
itransformer.fit(data_handler.train,validation_data=data_handler.val,callbacks=[tensorboard_callback,early_stopping],epochs=50)
File "tf_py311/lib/python3.11/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "tf_py311/lib/python3.11/site-packages/keras/engine/training.py", line 1685, in fit
tmp_logs = self.train_function(iterator)
File "tf_py311/lib/python3.11/site-packages/keras/engine/training.py", line 1284, in train_function
return step_function(self, iterator)
File "tf_py311/lib/python3.11/site-packages/keras/engine/training.py", line 1268, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "tf_py311/lib/python3.11/site-packages/keras/engine/training.py", line 1249, in run_step
outputs = model.train_step(data)
File "tf_py311/lib/python3.11/site-packages/keras/engine/training.py", line 1054, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "tf_py311/lib/python3.11/site-packages/keras/optimizers/optimizer.py", line 542, in minimize
grads_and_vars = self.compute_gradients(loss, var_list, tape)
File "tf_py311/lib/python3.11/site-packages/keras/mixed_precision/loss_scale_optimizer.py", line 1249, in compute_gradients
grads_and_vars = self._optimizer.compute_gradients(
File "tf_py311/lib/python3.11/site-packages/keras/optimizers/optimizer.py", line 275, in compute_gradients
grads = tape.gradient(loss, var_list)
Node: 'gradient_tape/model/iencoder/encoder/encoder_layer_1/conv1d_3/Conv1D/Conv2DBackpropFilter'
Conv2DCustomBackpropFilterOp only supports NHWC.
[[{{node gradient_tape/model/iencoder/encoder/encoder_layer_1/conv1d_3/Conv1D/Conv2DBackpropFilter}}]] [Op:__inference_train_function_21068]
i have already cast everything i use (constants) to float 16 but still the same problem