I am struggling with tensorflow gradients. I am using a custom second order optimizer where I need to compute hessians. The library I have chosen uses tf.python.ops.gradients instead of the standard tape.gradients(…). Running the below script raises a value error (gradients are None):
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.optimizer_v2 import optimizer_v2
from tensorflow.python.keras import backend
from tensorflow.python.ops import gradients
from tensorflow.python.util import nest
class SecondOrderOptimizer(optimizer_v2.OptimizerV2):
_HAS_AGGREGATE_GRAD = True
def __init__(self, name='SecondOrderOpt', **kwargs):
super(SecondOrderOptimizer, self).__init__(name, **kwargs)
def get_gradients_hessian(self, loss, params):
params = nest.flatten(params)
with backend.get_graph().as_default(), backend.name_scope(self._name+"/gradients"):
grads = gradients.gradients(loss, params)
for grad, param in zip(grads, params):
if grad is None:
raise ValueError("Variable {} has `None` for gradient. ".format(param))
np.random.seed(42)
X_train = np.linspace(-1, 1, 100).reshape(-1, 1)
y_train = X_train**2 + np.random.normal(0, 0.05, (100, 1))
X_train = tf.Variable(X_train, dtype=tf.float32)
y_train = tf.Variable(y_train, dtype=tf.float32)
class SimpleNN(tf.keras.Model):
def __init__(self):
super(SimpleNN, self).__init__()
self.hidden = tf.keras.layers.Dense(10, activation='relu')
self.output_layer = tf.keras.layers.Dense(1, activation='linear')
def call(self, x):
x = self.hidden(x)
return self.output_layer(x)
model = SimpleNN()
loss_fn = tf.keras.losses.MeanSquaredError()
optimizer = SecondOrderOptimizer()
def train_step(model, X, y, optimizer):
with tf.GradientTape() as tape:
# loss = tf.reduce_sum((y - X**2)**2)
loss = loss_fn(y, model(X, training=True))
grads, Hessian = optimizer.get_gradients_hessian(loss, model.trainable_weights)
#optimizer.apply_gradients_hessian(zip(grads, Hessian, model.trainable_weights)) # omitted
return loss
for epoch in range(100):
loss = train_step(model, X_train, y_train, optimizer)
I am using python 3.10 with tensorflow 2.15.
This scripts eliminates the possibilities posted in tensorflow’s “Cases where gradient returns None” documentation. The original package creators are unresponsive.