not so long ago I taught the model, got the results and everything approx. Now I needed to restart the project and everything stopped working for me, namely the error on catboost.
X = df_train.drop(columns=['type_answer'])
y = df_train['type_answer']
X_train, X_test, y_train, y_test = train_test_split(
X, y,
test_size=0.15,
stratify=y,
random_state=42
)
enc = TargetEncoder(cols=X.select_dtypes('object').columns)
X_train_encoded = enc.fit_transform(X_train, y_train)
X_test_encoded = enc.transform(X_test)
models = {
'Random Forest': RandomForestClassifier(
class_weight='balanced', random_state=42
),
'XGBoost': XGBClassifier(
scale_pos_weight=(len(y_train) -sum(y_train)) /sum(y_train),
random_state=42
),
'CatBoost': CatBoostClassifier(
auto_class_weights='Balanced',
custom_metric=['Precision'],
verbose=0, random_state=42
),
'LightGBM': LGBMClassifier(
class_weight='balanced', random_state=42, verbose=-1
)
}
for model_name, model in models.items():
model.fit(X_train_encoded, y_train)
y_pred = model.predict_proba(X_test_encoded)[:, 1]
gini = gini_score(y_test, y_pred)
print(f'{model_name}: Gini test = {round(100 * gini, 2)}%')
---------------------------------------------------------------------------CatBoostError Traceback (most recent call last)
Cell In[78], line 22 2 models = { 3 'Random Forest': RandomForestClassifier( 4 class_weight='balanced', random_state=42 (...) 17 ) 18 } 20 for model_name, model in models.items(): 21 # try:---> 22 model.fit(X_train_encoded, y_train) 23 y_pred = model.predict_proba(X_test_encoded)[:, 1] 24 gini = gini_score(y_test, y_pred)
File ~AppDataLocalProgramsPythonPython310libsite-packagescatboostcore.py:5220, in CatBoostClassifier.fit(self, X, y, cat_features, text_features, embedding_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr) 5217 if 'loss_function' in params: 5218 CatBoostClassifier._check_is_compatible_loss(params['loss_function'])-> 5220 self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model, 5221 eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, 5222 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr) 5223 return self
File ~AppDataLocalProgramsPythonPython310libsite-packagescatboostcore.py:2400, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr) 2397 allow_clear_pool = train_params["allow_clear_pool"] 2399 with plot_wrapper(plot, plot_file, 'Training plots', [_get_train_dir(self.get_params())]):-> 2400 self._train( 2401 train_pool, 2402 train_params["eval_sets"], 2403 params, 2404 allow_clear_pool, 2405 train_params["init_model"] 2406 ) 2408 # Have property feature_importance possibly set 2409 loss = self._object._get_loss_function_name()
File ~AppDataLocalProgramsPythonPython310libsite-packagescatboostcore.py:1780, in _CatBoostBase._train(self, train_pool, test_pool, params, allow_clear_pool, init_model) 1779 def _train(self, train_pool, test_pool, params, allow_clear_pool, init_model):-> 1780 self._object._train(train_pool, test_pool, params, allow_clear_pool, init_model._object if init_model else None) 1781 self._set_trained_model_attributes()
File _catboost.pyx:4833, in _catboost._CatBoost._train()
File _catboost.pyx:4882, in _catboost._CatBoost._train()CatBoostError:
methods tried:
- reinstalling catboost
- delete old environment and install new
- training separately for catboost via Pool
A big request to help with understanding the problems, because the project was launched many times earlier and there were no problems.
New contributor
Yuriy is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.