I am trying to make a model using xgboost, especially with early stopping in the part of cross validation.
And im trying to compare multiples of models, xgboost and the others.
But facing with an error.
The version of xgboost on my local is 2.1.0.
Here’s the codes.
def cross_validate_score(model, data: pd.DataFrame, cv=None, test_data: pd.DataFrame = None, label: str = 'Response', include_original: bool = True, original_data: pd.DataFrame = None):
if cv is None:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
if test_data is None:
raise ValueError("test_data must be provided")
X = data.copy()
y = X.pop(label)
val_predictions = np.zeros(len(X))
test_predictions = np.zeros(len(test_data))
train_scores, val_scores = [], []
for fold, (train_idx, val_idx) in enumerate(cv.split(X, y)):
X_train = X.iloc[train_idx].reset_index(drop=True)
y_train = y.iloc[train_idx].reset_index(drop=True)
X_val = X.iloc[val_idx].reset_index(drop=True)
y_val = y.iloc[val_idx].reset_index(drop=True)
if include_original:
if original_data is None:
raise ValueError("original_data must be provided when include_original is True")
X_train = pd.concat([original_data.drop(label, axis=1), X_train]).reset_index(drop=True)
y_train = pd.concat([original_data[label], y_train]).reset_index(drop=True)
model_cloned = clone(model)
if isinstance(model_cloned, XGBClassifier):
eval_set = [(X_val, y_val)]
model_cloned.fit(X_train, y_train, eval_set=eval_set, early_stopping_rounds=50, verbose=False) # I think this is the main reason of the error.
best_iteration = model_cloned.best_iteration
train_preds_proba = model_cloned.predict_proba(X_train, iteration_range=(0, best_iteration))[:, 1]
val_preds_proba = model_cloned.predict_proba(X_val, iteration_range=(0, best_iteration))[:, 1]
test_preds_proba = model_cloned.predict_proba(test_data, iteration_range=(0, best_iteration))[:, 1]
elif isinstance(model_cloned, LGBMClassifier):
eval_set = [(X_val, y_val)]
model_cloned.fit(X_train, y_train, eval_set=eval_set, eval_metric='auc', callbacks=[early_stopping(50)])
best_iteration = model_cloned.best_iteration_
train_preds_proba = model_cloned.predict_proba(X_train, num_iteration=best_iteration)[:, 1]
val_preds_proba = model_cloned.predict_proba(X_val, num_iteration=best_iteration)[:, 1]
test_preds_proba = model_cloned.predict_proba(test_data, num_iteration=best_iteration)[:, 1]
else:
raise ValueError("Model type not supported for early stopping.")
val_predictions[val_idx] = val_preds_proba
train_scores.append(roc_auc_score(y_train, train_preds_proba))
val_scores.append(roc_auc_score(y_val, val_preds_proba))
print(f'Fold {fold}: {val_scores[-1]:.5f}')
test_predictions += test_preds_proba / cv.get_n_splits()
print(f'Val Score: {np.mean(val_scores):.7f} ± {np.std(val_scores):.7f} | Train Score: {np.mean(train_scores):.7f} ± {np.std(train_scores):.7f} | {label}')
return val_scores, val_predictions, test_predictions
cv_summary, oof_predictions, test_predictions = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
xgb_params = {
'n_estimators': 10000,
'eta': 0.05,
'alpha': 0.2545607592482198,
'subsample': 0.8388163485383147,
'colsample_bytree': 0.2732499701466825,
'max_depth': 16,
'min_child_weight': 5,
'gamma': 0.0017688666476104672,
'eval_metric': 'auc',
'max_bin': 262143, #a weird max_bin, for reference: https://www.kaggle.com/competitions/playground-series-s4e7/discussion/516265
'tree_method': 'gpu_hist',
}
xgb_tuned = XGBClassifier(**xgb_params, random_state=42)
%%time
cv_summary['xgb'], oof_predictions['xgb'], test_predictions['xgb'] = cross_validate_score(xgb_tuned, data=train_df, test_data=test_df, include_original=False)
And the error is like this.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File <timed exec>:1
Cell In[58], line 35
33 if isinstance(model_cloned, XGBClassifier):
34 eval_set = [(X_val, y_val)]
---> 35 model_cloned.fit(X_train, y_train, eval_set=eval_set, early_stopping_rounds=50, verbose=False) # 조기 종료 설정
36 best_iteration = model_cloned.best_iteration
37 train_preds_proba = model_cloned.predict_proba(X_train, iteration_range=(0, best_iteration))[:, 1]
File c:Usersdxschoolanaconda3Libsite-packagesxgboostcore.py:726, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
724 for k, arg in zip(sig.parameters, args):
725 kwargs[k] = arg
--> 726 return func(**kwargs)
TypeError: XGBClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'
Is there anybody who had faced with this problem, exactly same to mine?
I really need you guy’s help!!!