I want to achieve the following curve by coding the two formulas to produce a smoothed y . This is an example from Hosmer et al.
My plot currently looks like this:
It kinda looks similiar but I am not sure what is wrong in the code. Did I put in the formulas wrong? Has anyone tried to replicate the ‘Scale_example’ by Hosmer?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Define the weight function
def weight_function(xi, xj, bandwidth):
return (1 - ((abs(xi - xj)) / bandwidth)** 3) ** 3
# Compute smoothed values
def compute_smoothed_values(x, y, bandwidth):
smoothed_values = []
n = len(x)
for i in range(n):
distances = np.abs(x - x[i])
sorted_indices = np.argsort(distances)
nearest_indices = sorted_indices[:int(bandwidth * n)]
weights = np.array([weight_function(x[i], x[j], distances[nearest_indices[-1]]) for j in nearest_indices])
weights /= weights.sum() # Normalize weights
smoothed_value = np.sum(weights * y[nearest_indices])
smoothed_values.append(smoothed_value)
return np.array(smoothed_values)
# Convert to log-odds scale
def log_odds(p):
return np.log(p / (1 - p))
def compute_log_odds(smoothed_values):
# Ensure smoothed values are within (0, 1) to avoid division by zero or log of zero
smoothed_values = np.clip(smoothed_values, 1e-10, 1 - 1e-10)
return log_odds(smoothed_values)
# Plot the results
def plot_smoothed_values(x, log_odds_values):
plt.scatter(x, log_odds_values, label='Log-Odds Smoothed Values')
plt.xlabel('Covariate x')
plt.ylabel('Log-Odds')
plt.title('Smoothed Values on Log-Odds Scale')
plt.legend()
plt.show()
# Example usage
if __name__ == "__main__":
# Example data
x=np.array ([27.88814,26.77344,26.32696,27.44851,27.4943,31.0162,23.48546,25.5608,24.95539,24.18913,26.3262,21.17183,20.38803,33.92283,33.23304,32.58929,33.07627,34.78639,38.48568,34.39309,37.79305,34.86726,34.10209,35.85667,36.83823,40.82987,33.62194,39.42726,38.7647,33.00929,41.59631,50.42802,47.10275,49.99162,46.46074,48.68956,44.47505,52.31591,48.81218,47.22429,54.81986,55.08204,55.07721,48.85988,60.05986,60.10145,60.53315,68.47058,65.67481,66.14304,63.50969,64.96313,68.9956,65.90376,56.88092,66.27928])
y=np.array ([0,1,1,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0])
bandwidth = 0.8 # Example bandwidth value
# Compute smoothed values
smoothed_values = compute_smoothed_values(x, y, bandwidth)
# Convert to log-odds scale
log_odds_values = compute_log_odds(smoothed_values)
# Plot the results
plot_smoothed_values(x, log_odds_values)