I’m trying to convert a logistical regression model into user-level scores, based on this article.
y_pred_df['sub_primary'] = logreg.predict_proba(y_pred_df.loc[:, [col for col in y_pred_df.columns if col != 'sub_primary']])[:, 1]
X_test_df['sub_primary'] = logreg.predict_proba(X_test_df.loc[:, [col for col in X_test_df.columns if col != 'sub_primary']])[:, 1]
X_y_train['Score'] = np.log((1 - X_y_train['sub_primary']) / X_y_train['sub_primary'] + 1e-10) * (pdo / np.log(2)) + one_to_one
X_y_test['Score'] = np.log((1 - X_y_test['sub_primary']) / X_y_test['sub_primary'] + 1e-10) * (pdo / np.log(2)) + one_to_one
I previously converted the Numpy arrays into pandas dataframes, however I keep getting the following error:
/opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py:493:
UserWarning: X does not have valid feature names, but
LogisticRegression was fitted with feature names warnings.warn(
File
/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_base.py:366,
in LinearClassifierMixin._predict_proba_lr(self, X)
359 def _predict_proba_lr(self, X):
360 """Probability estimation for OvR logistic regression.
361
362 Positive class probabilities are computed as
363 1. / (1. + np.exp(-self.decision_function(X)));
364 multiclass is handled by normalizing that over all classes.
365 """
--> 366 prob = self.decision_function(X)
367 expit(prob, out=prob)
368 if prob.ndim == 1:
File
/opt/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_base.py:332,
in LinearClassifierMixin.decision_function(self, X)
329 check_is_fitted(self)
330 xp, _ = get_namespace(X)
--> 332 X = self._validate_data(X, accept_sparse="csr", reset=False)
333 scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
334 return xp.reshape(scores, (-1,)) if scores.shape[1] == 1 else scores
File /opt/anaconda3/lib/python3.12/site-packages/sklearn/base.py:633,
in BaseEstimator._validate_data(self, X, y, reset,
validate_separately, cast_to_ndarray, **check_params)
631 out = X, y
632 elif not no_val_X and no_val_y:
--> 633 out = check_array(X, input_name="X", **check_params)
634 elif no_val_X and not no_val_y:
635 out = _check_y(y, **check_params)
File
/opt/anaconda3/lib/python3.12/site-packages/sklearn/utils/validation.py:879,
in check_array(array, accept_sparse, accept_large_sparse, dtype,
order, copy, force_all_finite, ensure_2d, allow_nd,
ensure_min_samples, ensure_min_features, estimator, input_name)
875 pandas_requires_conversion = any(
876 _pandas_dtype_needs_early_conversion(i) for i in dtypes_orig
877 )
878 if all(isinstance(dtype_iter, np.dtype) for dtype_iter in dtypes_orig):
--> 879 dtype_orig = np.result_type(*dtypes_orig)
880 elif pandas_requires_conversion and any(d == object for d in dtypes_orig):
881 # Force object if any of the dtypes is an object
882 dtype_orig = object
ValueError: at least one array or dtype is required
I’m befuddled.