I am using the example from here.
The example works fine, but, the moment I try to get more flexible with the custom metric class by using self in it, I get hit by a UserWarning: Can't optimze method "evaluate" because self argument is used
Code to replicate the issue (If you want the issue gone then comment out the self.foo = 5
line in the evaluate method of LoglossMetric)
<code>from catboost import CatBoostClassifier, CatBoostRegressor, MultiTargetCustomMetric, MultiTargetCustomObjective
import numpy as np
from sklearn.datasets import make_classification, make_regression, make_multilabel_classification
from sklearn.model_selection import train_test_split
class LoglossMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)
def is_max_optimal(self):
return False
def evaluate(self, approxes, target, weight):
self.foo = 5
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
for i in range(len(approx)):
e = np.exp(approx[i])
p = e / (1 + e)
w = 1.0 if weight is None else weight[i]
weight_sum += w
error_sum += -w * (target[i] * np.log(p) + (1 - target[i]) * np.log(1 - p))
return error_sum, weight_sum
X, y = make_classification(n_classes=2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
model2 = CatBoostClassifier(iterations=10, loss_function="Logloss", eval_metric=LoglossMetric(),
learning_rate=0.03, bootstrap_type='Bayesian', boost_from_average=False,
leaf_estimation_iterations=1, leaf_estimation_method='Gradient')
model2.fit(X_train, y_train, eval_set=(X_test, y_test))
</code>
<code>from catboost import CatBoostClassifier, CatBoostRegressor, MultiTargetCustomMetric, MultiTargetCustomObjective
import numpy as np
from sklearn.datasets import make_classification, make_regression, make_multilabel_classification
from sklearn.model_selection import train_test_split
class LoglossMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)
def is_max_optimal(self):
return False
def evaluate(self, approxes, target, weight):
self.foo = 5
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
for i in range(len(approx)):
e = np.exp(approx[i])
p = e / (1 + e)
w = 1.0 if weight is None else weight[i]
weight_sum += w
error_sum += -w * (target[i] * np.log(p) + (1 - target[i]) * np.log(1 - p))
return error_sum, weight_sum
X, y = make_classification(n_classes=2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
model2 = CatBoostClassifier(iterations=10, loss_function="Logloss", eval_metric=LoglossMetric(),
learning_rate=0.03, bootstrap_type='Bayesian', boost_from_average=False,
leaf_estimation_iterations=1, leaf_estimation_method='Gradient')
model2.fit(X_train, y_train, eval_set=(X_test, y_test))
</code>
from catboost import CatBoostClassifier, CatBoostRegressor, MultiTargetCustomMetric, MultiTargetCustomObjective
import numpy as np
from sklearn.datasets import make_classification, make_regression, make_multilabel_classification
from sklearn.model_selection import train_test_split
class LoglossMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)
def is_max_optimal(self):
return False
def evaluate(self, approxes, target, weight):
self.foo = 5
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
for i in range(len(approx)):
e = np.exp(approx[i])
p = e / (1 + e)
w = 1.0 if weight is None else weight[i]
weight_sum += w
error_sum += -w * (target[i] * np.log(p) + (1 - target[i]) * np.log(1 - p))
return error_sum, weight_sum
X, y = make_classification(n_classes=2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
model2 = CatBoostClassifier(iterations=10, loss_function="Logloss", eval_metric=LoglossMetric(),
learning_rate=0.03, bootstrap_type='Bayesian', boost_from_average=False,
leaf_estimation_iterations=1, leaf_estimation_method='Gradient')
model2.fit(X_train, y_train, eval_set=(X_test, y_test))
The same thing happens if I use self in the evaluate method
- What is the optimization that catboost is failing to do and why?