I try to use loss form q1 and q2 to train my meta model.But i don’t know why Gradient is always None
ini = initializers.GlorotNormal(seed = 1234) # Set a fixed seed for a XavierNormal initializer
# Determine hyperparameters
sn_train = 800 # Sampling number for training
sn_test = 200 # Sampling number for test
it = 0.0 # Initial time sample point
lt = 2.0*np.pi # last time sample point
epsilon = 1.0e-5 # You should determine it
epochs = 1000 # Epoch number
lr = 1.0e-2 # learning rate
batch_size = 32 # Define your batch size
# Create input samples for the other tasks in different manner with different intervals in between
def generate_data(it, lt, sn): # sampling for training
x = np.random.uniform(it, lt, sn) # Sampling for training the meta_model.
x = np.sort(x) # Sorting input samples
y = np.cos(x) + 2 # A given nonlinear equation
return x, y
# Generate data for training each task
s1_x, s1_y = generate_data(it, lt, sn_train) # for the 1st support task
s2_x, s2_y = generate_data(it, lt, sn_train) # for the 2nd support task
q1_x, q1_y = generate_data(it, lt, sn_train) # for the 1st query task
q2_x, q2_y = generate_data(it, lt, sn_train) # for the 2nd query task
meta_x, meta_y = generate_data(it, lt, sn_train) # for a meta-learning
meta_test_x = np.linspace(it, lt, sn_test) # for testing meta-model
meta_test_y = np.cos(meta_test_x) + 2 # A given nonlinear equation
def create_model():
model = tf.keras.Sequential([
tf.keras.layers.Dense(32, activation = 'relu', input_shape = (1,)),
tf.keras.layers.Dense(32, activation = 'relu'), # Non-existance of initializers for hidden layers
tf.keras.layers.Dense(1)])
return model
meta_model = create_model()
# Initialize the meta_model using a specific initializer
for layer in meta_model.layers:
if isinstance(layer, tf.keras.layers.Dense):
layer.kernel_initializer = ini
if layer.use_bias:
layer.bias.assign(ini(layer.bias.shape))
# Create the 1st support model
s1_model = create_model()
# Initialization using Transfer learning from the 'meta_model' to the 's1_model'
for i, layer in enumerate(s1_model.layers):
if isinstance(layer, tf.keras.layers.Dense):
weights, biases = meta_model.layers[i].get_weights() # Get weights and biases from the meta_model
layer.set_weights([weights, biases]) # Set weights and biases to the s1_model
# Compile the s1_model
s1_model.compile(optimizer = Adam(learning_rate = lr), loss = 'mean_squared_error')
# Train the s1_model
s1_history = s1_model.fit(s1_x.reshape(-1, 1), s1_y, epochs = epochs, batch_size = batch_size, shuffle = True, verbose = 0)
# Print the final loss of the s1_model
s1_loss = s1_history.history['loss'][-1]
print(f'Final loss of the s1_model: {s1_loss}')
# Create the 2nd support model
s2_model = create_model()
# Initialization using Transfer learning from the 's1_model' to the 's2_model'
for i, layer in enumerate(s2_model.layers):
if isinstance(layer, tf.keras.layers.Dense):
weights, biases = s1_model.layers[i].get_weights() # Get weights and biases from the s1_model
layer.set_weights([weights, biases]) # Set weights and biases to the s2_model
# Compile the s2_model
s2_model.compile(optimizer = Adam(learning_rate = lr), loss = 'mean_squared_error')
# Train the s2_model
s2_history = s2_model.fit(s2_x.reshape(-1, 1), s2_y, epochs = epochs, batch_size = batch_size, shuffle = True, verbose = 0)
# Print the final loss of the s2_model
s2_loss = s2_history.history['loss'][-1]
print(f'Final loss of the s2_model: {s2_loss}')
# Create the 1st query model
q1_model = create_model()
# Initialization using Transfer learning from the 's1_model' to the 'q1_model'
for i, layer in enumerate(q1_model.layers):
if isinstance(layer, tf.keras.layers.Dense):
weights, biases = s1_model.layers[i].get_weights() # Get weights and biases from the s1_model
layer.set_weights([weights, biases]) # Set weights and biases to the q1_model
# Create the 2nd query model
q2_model = create_model()
# Initialization using Transfer learning from the 's2_model' to the 'q2_model'
for i, layer in enumerate(q2_model.layers):
if isinstance(layer, tf.keras.layers.Dense):
weights, biases = s2_model.layers[i].get_weights() # Get weights and biases from the s2_model
layer.set_weights([weights, biases]) # Set weights and biases to the q2_model
# Compile the optimizer for meta_learning
optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
loss_fn = tf.keras.losses.MeanSquaredError()
# Call the meta_model
meta_model = meta_model
# Train the meta_model
meta_model.compile(optimizer = optimizer, loss = 'mean_squared_error')
epsilon = 4.8e-3
# if total_loss is larger than epsilon:
loss_fn = keras.losses.MeanSquaredError() # loss function
total_loss = 1
iter = 0
while(True):
with tf.GradientTape() as tape:
# Compute predictions and losses for query tasks
q1_prediction = q1_model(q1_x.reshape(-1, 1), training = False)
q1_loss = loss_fn(q1_y.reshape(-1, 1), q1_prediction)
q2_prediction = q2_model(q2_x.reshape(-1, 1), training = False)
q2_loss = loss_fn(q2_y.reshape(-1, 1), q2_prediction)
# Combine the losses to evaluate total_loss
total_loss = q1_loss + q2_loss
# print(f"Q1:{q1_loss} Q2:{q2_loss} Total:{total_loss}")
gradients = tape.gradient(total_loss, meta_model.trainable_variables) # Evaluate the gradients of model parameters in the meta_model
print(gradients)
if gradients is None or all(g is None for g in gradients):
print("No gradients computed.")
else:
optimizer.apply_gradients(zip(gradients, meta_model.trainable_variables))# Update (optimize) the model parameters of the meta_model using the gradients
if total_loss.numpy() <= epsilon:
print(f"Stopping early at iteration {iter+1} with Total Loss: {total_loss.numpy()}")
break
# Print iteration details
iter = iter + 1
print(f"Iteration: {iter}, Total Loss: {total_loss.numpy()}")
Before This I get the ValueError: No gradients provided for any variable. So I added the code below to fix this problem but as I see Gradient it’s always None all the time.
if gradients is None or all(g is None for g in gradients):
print("No gradients computed.")
else:
optimizer.apply_gradients(zip(gradients, meta_model.trainable_variables))# Update
I don’t know why it’s return None form gradients. If i missed something, please let me know Thankyou
New contributor
Beer Siritiwa is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.