Thiết kế website giá rẻ

Question

I try to use loss form q1 and q2 to train my meta model.But i don’t know why Gradient is always None


ini = initializers.GlorotNormal(seed = 1234) # Set a fixed seed for a XavierNormal initializer
# Determine hyperparameters
sn_train = 800 # Sampling number for training
sn_test = 200 # Sampling number for test
it = 0.0 # Initial time sample point
lt = 2.0*np.pi # last time sample point
epsilon = 1.0e-5 # You should determine it
epochs = 1000 # Epoch number
lr = 1.0e-2 # learning rate
batch_size = 32 # Define your batch size
# Create input samples for the other tasks in different manner with different intervals in between

def generate_data(it, lt, sn): # sampling for training
    x = np.random.uniform(it, lt, sn) # Sampling for training the meta_model.  
    x = np.sort(x) # Sorting input samples
    y = np.cos(x) + 2 # A given nonlinear equation
    return x, y 

# Generate data for training each task
s1_x, s1_y = generate_data(it, lt, sn_train) # for the 1st support task
s2_x, s2_y = generate_data(it, lt, sn_train) # for the 2nd support task
q1_x, q1_y = generate_data(it, lt, sn_train) # for the 1st query task
q2_x, q2_y = generate_data(it, lt, sn_train) # for the 2nd query task
meta_x, meta_y = generate_data(it, lt, sn_train) # for a meta-learning

meta_test_x = np.linspace(it, lt, sn_test) # for testing meta-model 
meta_test_y = np.cos(meta_test_x) + 2 # A given nonlinear equation
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(32, activation = 'relu', input_shape = (1,)),
        tf.keras.layers.Dense(32, activation = 'relu'), # Non-existance of initializers for hidden layers
        tf.keras.layers.Dense(1)])
    return model
meta_model = create_model() 

# Initialize the meta_model using a specific initializer
for layer in meta_model.layers:
    if isinstance(layer, tf.keras.layers.Dense):
        layer.kernel_initializer = ini
        if layer.use_bias:
            layer.bias.assign(ini(layer.bias.shape))
# Create the 1st support model
s1_model = create_model()

# Initialization using Transfer learning from the 'meta_model' to the 's1_model'
for i, layer in enumerate(s1_model.layers):
    if isinstance(layer, tf.keras.layers.Dense):
        weights, biases = meta_model.layers[i].get_weights() # Get weights and biases from the meta_model
        layer.set_weights([weights, biases])  # Set weights and biases to the s1_model

# Compile the s1_model 
s1_model.compile(optimizer = Adam(learning_rate = lr), loss = 'mean_squared_error')

# Train the s1_model
s1_history = s1_model.fit(s1_x.reshape(-1, 1), s1_y, epochs = epochs, batch_size = batch_size, shuffle = True, verbose = 0)

# Print the final loss of the s1_model
s1_loss = s1_history.history['loss'][-1]
print(f'Final loss of the s1_model: {s1_loss}')
# Create the 2nd support model
s2_model = create_model()

# Initialization using Transfer learning from the 's1_model' to the 's2_model'
for i, layer in enumerate(s2_model.layers):
    if isinstance(layer, tf.keras.layers.Dense):
        weights, biases = s1_model.layers[i].get_weights() # Get weights and biases from the s1_model
        layer.set_weights([weights, biases])  # Set weights and biases to the s2_model

# Compile the s2_model 
s2_model.compile(optimizer = Adam(learning_rate = lr), loss = 'mean_squared_error')

# Train the s2_model
s2_history = s2_model.fit(s2_x.reshape(-1, 1), s2_y, epochs = epochs, batch_size = batch_size, shuffle = True, verbose = 0)

# Print the final loss of the s2_model
s2_loss = s2_history.history['loss'][-1]
print(f'Final loss of the s2_model: {s2_loss}')
# Create the 1st query model
q1_model = create_model()

# Initialization using Transfer learning from the 's1_model' to the 'q1_model'
for i, layer in enumerate(q1_model.layers):
    if isinstance(layer, tf.keras.layers.Dense):
        weights, biases = s1_model.layers[i].get_weights() # Get weights and biases from the s1_model
        layer.set_weights([weights, biases])  # Set weights and biases to the q1_model

# Create the 2nd query model
q2_model = create_model()

# Initialization using Transfer learning from the 's2_model' to the 'q2_model'
for i, layer in enumerate(q2_model.layers):
    if isinstance(layer, tf.keras.layers.Dense):
        weights, biases = s2_model.layers[i].get_weights() # Get weights and biases from the s2_model
        layer.set_weights([weights, biases])  # Set weights and biases to the q2_model
# Compile the optimizer for meta_learning
optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
loss_fn = tf.keras.losses.MeanSquaredError()
# Call the meta_model
meta_model = meta_model
# Train the meta_model

meta_model.compile(optimizer = optimizer, loss = 'mean_squared_error')
epsilon = 4.8e-3

# if total_loss is larger than epsilon:
loss_fn = keras.losses.MeanSquaredError() # loss function
total_loss = 1
iter = 0
while(True):
    with tf.GradientTape() as tape:  
        
        # Compute predictions and losses for query tasks
        q1_prediction = q1_model(q1_x.reshape(-1, 1), training = False)
        q1_loss = loss_fn(q1_y.reshape(-1, 1), q1_prediction)
         
        q2_prediction = q2_model(q2_x.reshape(-1, 1), training = False)
        q2_loss = loss_fn(q2_y.reshape(-1, 1), q2_prediction)
        
        # Combine the losses to evaluate total_loss
        total_loss = q1_loss + q2_loss
        
    # print(f"Q1:{q1_loss} Q2:{q2_loss} Total:{total_loss}")

    gradients = tape.gradient(total_loss, meta_model.trainable_variables)  # Evaluate the gradients of model parameters in the meta_model  
    
    print(gradients)
    if gradients is None or all(g is None for g in gradients):
        print("No gradients computed.")
    else:
         optimizer.apply_gradients(zip(gradients, meta_model.trainable_variables))# Update (optimize) the model parameters of the meta_model using the gradients

    if total_loss.numpy() <= epsilon:
        print(f"Stopping early at iteration {iter+1} with Total Loss: {total_loss.numpy()}")
        break

    # Print iteration details
    iter = iter + 1
    print(f"Iteration: {iter}, Total Loss: {total_loss.numpy()}")

Before This I get the ValueError: No gradients provided for any variable. So I added the code below to fix this problem but as I see Gradient it’s always None all the time.

 if gradients is None or all(g is None for g in gradients):
    print("No gradients computed.")
 else:
    optimizer.apply_gradients(zip(gradients, meta_model.trainable_variables))# Update

I don’t know why it’s return None form gradients. If i missed something, please let me know Thankyou

Thiết kế website giá rẻ

Danh mục

TensorFlow Gradient always None