I am doing a logistic regression using sklearn. Thanks to it, I calculate the midpoint of the S-curve (f(x) = 0.5, midpoint_test) and the 16th and 84th percentiles (see dashed lines). I would like to know if there is a way to get the uncertainties associated to these values.
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
np.random.seed(42) # for reproducibility
X = np.random.rand(100, 1) * 1000 # generate a random vector that ranges from 0 to 1000
X_test = np.linspace(0, 1000, 5000).reshape(-1, 1) # generate testing data
y = (X > 500) # generate binary classification labels
y_int = y.astype(int).flatten() # convert to 0 and 1
scaler_X = MinMaxScaler() # scaler
# scaled
X_scaled = scaler_X.fit_transform(X) # scale X
clf_scaled = LogisticRegression(C=1e3, fit_intercept=True) # logistic with scaled
clf_scaled.fit(X_scaled, y_int) # fit logistic with scaled
X_test_scaled = scaler_X.transform(X_test) # scale test data
probabilities_scaled = clf_scaled.predict_proba(X_test_scaled)[:, 1] # get probabilities of test data
a_voir_scaled = probabilities_scaled * (np.max(y_int) - np.min(y_int)) + np.min(y_int) # reverse normalizing
midpoint_test = (logit(0.5)-clf.intercept_)/clf.coef_
# print(find_x(0.84, X_test.flatten(), a_voir_scaled.flatten()))
y_val = 0.84 # <-- change here the y-value
y_val1 = 0.84 # <-- change here the y-value
y_val2 = 0.16 # <-- change here the y-value
l_idx = np.abs(a_voir_scaled - y_val1).argmin()
r_idx = np.abs(a_voir_scaled - y_val2).argmin()
l_idx_2s = np.abs(a_voir_scaled - 2*y_val1).argmin()
r_idx_2s = np.abs(a_voir_scaled - 2*y_val2).argmin()
x1_val = X_test[l_idx] # -10.0
x2_val = X_test[r_idx] # +10.0
# Plot the original data and the logistic regression curve
plt.scatter(X, y_int, label='Original')
plt.plot(X_test, a_voir_scaled, label='scaled')
plt.plot(X_test, a_voir_unscaled, label='unscaled')
plt.vlines(x1_val, 0, 1, linestyle="dashed")
plt.vlines(x2_val, 0, 1, linestyle="dashed")
plt.vlines(midpoint_test, 0, 1, linestyle="dashed")
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
Thanks!