The loss function works as intended, but I’m not getting any gradients computed. batter_cdfs is just a 2150×10 array of floats between 0 and 1.
The loss function is supposed to, in essence, return the log-prob of a single observation from a joint distribution that the model would generate.
def custom_loss(y_true, y_pred):
y_true_expanded = tf.expand_dims(y_true, axis=1)
y_true_expanded = tf.tile(y_true_expanded, [1, tf.shape(y_pred)[1], 1])
mask = tf.less(y_pred, y_true_expanded)
tf.print("mask:", mask)
row_wise_comparison = tf.reduce_mean(tf.cast(mask, tf.float32), axis=2)
result = tf.reduce_mean(row_wise_comparison, axis=1)
log_result = tf.math.log(result + 1e-9) # Adding a small value to avoid log(0)
return -tf.reduce_mean(log_result)
cdf_model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation = 'LeakyReLU'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(4500, activation='sigmoid'),
tf.keras.layers.Reshape((500,9))
])
cdf_model.compile(optimizer=tf.optimizers.Nadam(), loss=custom_loss)
input_shape = (100,)
y_train = np.array(batter_cdfs.iloc[:,0:9])
x_train = np.array(batter_cdfs.iloc[:,9:])
cdf_model.fit(x_train, y_train, epochs=15,validation_split = 0.2, verbose= True, shuffle=True, batch_size=30)