I am trying to implement pAUC above 80% true positive rate (TPR) for binary classification in TensorFlow. pAUC is often regarded as better metric than AUC for assessing performance of binary classifiers. In a previous post (How to implement calculating pAUC and its derivatives in python?), there was no answer.
I tried to approximate pAUC by calculating the values of TPR and FPR for eleven different thresholds 0.0, 0.1,…, 1.0 and calculating the area under the curve between TPR=0.8 to TPR-1.0. First, I calculated the usual AUC. Then I deducted the area falling between TPR=0 to TPR=0.8.
import tensorflow as tf
from sklearn.metrics import roc_auc_score
import numpy as np
import tensorflow_probability as tfp
yt = tf.convert_to_tensor([[0], [0], [1], [0], [1], [0], [0], [1], [1], [1]])
yp = tf.convert_to_tensor([[0.0], [0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9]])
def coords(y_true, y_pred, th=0.0):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred >= th, tf.float32)
tp = tf.reduce_sum(y_pred * y_true)
fp = tf.reduce_sum(y_pred * (1 - y_true))
fn = tf.reduce_sum((1 - y_pred) * y_true)
tn = tf.reduce_sum((1 - y_pred) * (1 - y_true))
tpr = tp / (tp + fn + 1e-7)
fpr = fp / (fp + tn + 1e-7)
return (tpr.numpy(), fpr.numpy())
def soft_pAUC(y_true, y_pred):
tpr_00, fpr_00 = coords(y_true, y_pred, 0.0)
tpr_01, fpr_01 = coords(y_true, y_pred, 0.1)
tpr_02, fpr_02 = coords(y_true, y_pred, 0.2)
tpr_03, fpr_03 = coords(y_true, y_pred, 0.3)
tpr_04, fpr_04 = coords(y_true, y_pred, 0.4)
tpr_05, fpr_05 = coords(y_true, y_pred, 0.5)
tpr_06, fpr_06 = coords(y_true, y_pred, 0.6)
tpr_07, fpr_07 = coords(y_true, y_pred, 0.7)
tpr_08, fpr_08 = coords(y_true, y_pred, 0.8)
tpr_09, fpr_09 = coords(y_true, y_pred, 0.9)
tpr_10, fpr_10 = coords(y_true, y_pred, 1.0)
TPR_00 = tf.math.minimum(tpr_00, 0.8)
TPR_01 = tf.math.minimum(tpr_01, 0.8)
TPR_02 = tf.math.minimum(tpr_02, 0.8)
TPR_03 = tf.math.minimum(tpr_03, 0.8)
TPR_04 = tf.math.minimum(tpr_04, 0.8)
TPR_05 = tf.math.minimum(tpr_05, 0.8)
TPR_06 = tf.math.minimum(tpr_06, 0.8)
TPR_07 = tf.math.minimum(tpr_07, 0.8)
TPR_08 = tf.math.minimum(tpr_08, 0.8)
TPR_09 = tf.math.minimum(tpr_09, 0.8)
TPR_10 = tf.math.minimum(tpr_10, 0.8)
trap_00 = tf.math.abs(tfp.math.trapz([tpr_00, tpr_01, 0, 0], [fpr_00, fpr_01, fpr_01, fpr_00]))
trap_01 = tf.math.abs(tfp.math.trapz([tpr_01, tpr_02, 0, 0], [fpr_01, fpr_02, fpr_02, fpr_01]))
trap_02 = tf.math.abs(tfp.math.trapz([tpr_02, tpr_03, 0, 0], [fpr_02, fpr_03, fpr_03, fpr_02]))
trap_03 = tf.math.abs(tfp.math.trapz([tpr_03, tpr_04, 0, 0], [fpr_03, fpr_04, fpr_04, fpr_03]))
trap_04 = tf.math.abs(tfp.math.trapz([tpr_04, tpr_05, 0, 0], [fpr_04, fpr_05, fpr_05, fpr_04]))
trap_05 = tf.math.abs(tfp.math.trapz([tpr_05, tpr_06, 0, 0], [fpr_05, fpr_06, fpr_06, fpr_05]))
trap_06 = tf.math.abs(tfp.math.trapz([tpr_06, tpr_07, 0, 0], [fpr_06, fpr_07, fpr_07, fpr_06]))
trap_07 = tf.math.abs(tfp.math.trapz([tpr_07, tpr_08, 0, 0], [fpr_07, fpr_08, fpr_08, fpr_07]))
trap_08 = tf.math.abs(tfp.math.trapz([tpr_08, tpr_09, 0, 0], [fpr_08, fpr_09, fpr_09, fpr_08]))
trap_09 = tf.math.abs(tfp.math.trapz([tpr_09, tpr_10, 0, 0], [fpr_09, fpr_10, fpr_10, fpr_09]))
AUC1 = trap_00 + trap_01 + trap_02 + trap_03 + trap_04 + trap_05 + trap_06 + trap_07 + trap_08 + trap_09
TRAP_00 = tf.math.abs(tfp.math.trapz([TPR_00, TPR_01, 0, 0], [fpr_00, fpr_01, fpr_01, fpr_00]))
TRAP_01 = tf.math.abs(tfp.math.trapz([TPR_01, TPR_02, 0, 0], [fpr_01, fpr_02, fpr_02, fpr_01]))
TRAP_02 = tf.math.abs(tfp.math.trapz([TPR_02, TPR_03, 0, 0], [fpr_02, fpr_03, fpr_03, fpr_02]))
TRAP_03 = tf.math.abs(tfp.math.trapz([TPR_03, TPR_04, 0, 0], [fpr_03, fpr_04, fpr_04, fpr_03]))
TRAP_04 = tf.math.abs(tfp.math.trapz([TPR_04, TPR_05, 0, 0], [fpr_04, fpr_05, fpr_05, fpr_04]))
TRAP_05 = tf.math.abs(tfp.math.trapz([TPR_05, TPR_06, 0, 0], [fpr_05, fpr_06, fpr_06, fpr_05]))
TRAP_06 = tf.math.abs(tfp.math.trapz([TPR_06, TPR_07, 0, 0], [fpr_06, fpr_07, fpr_07, fpr_06]))
TRAP_07 = tf.math.abs(tfp.math.trapz([TPR_07, TPR_08, 0, 0], [fpr_07, fpr_08, fpr_08, fpr_07]))
TRAP_08 = tf.math.abs(tfp.math.trapz([TPR_08, TPR_09, 0, 0], [fpr_08, fpr_09, fpr_09, fpr_08]))
TRAP_09 = tf.math.abs(tfp.math.trapz([TPR_09, TPR_10, 0, 0], [fpr_09, fpr_10, fpr_10, fpr_09]))
AUC2 = TRAP_00 + TRAP_01 + TRAP_02 + TRAP_03 + TRAP_04 + TRAP_05 + TRAP_06 + TRAP_07 + TRAP_08 + TRAP_09
return AUC1 - AUC2
soft_pAUC(yt, yp)
I would like to know if:
soft_pAUC
is differentiable and could be used as loss function by negating and minimizingsoft_pAUC
could be improved in terms of accuracy and performance
Thanks in advance.