In the example below I get different probabilities when I run
probs_test = pipe.predict_proba(x_test)[:, 1]
and when I converted the pipeline to an ONNX object and run
res = sess.run(None, inputs)
Why is this the case? And how can I get the same probabilities from the ONNX object?
Reproducible example:
import pandas as pd
import sklearn.metrics as skmet
import sklearn.linear_model as lm
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.datasets import load_iris
from skl2onnx import to_onnx
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt
data = load_iris()
x, y = data.data, data.target
x_train = pd.DataFrame(x).add_prefix("column_")
x_train["target"] = y
x_train["target"] = x_train["target"].replace({2: 0})
x_test = x_train.copy()
target_column = "target"
features = ['column_0', 'column_1', 'column_2', 'column_3']
model = lm.LogisticRegression(
C=0.08,
fit_intercept=True,
solver="liblinear",
penalty="l1",
random_state=2,
max_iter=1000000,
verbose=0,
)
column_transformer = ColumnTransformer(
[
("num", "passthrough", features),
],
)
pipe = Pipeline(
[
("preprocessing", column_transformer),
("logistic_regression", model),
],
)
y_train = x_train[target_column].to_numpy()
y_test = x_test[target_column].to_numpy()
pipe = pipe.fit(X=x_train, y=y_train)
probs_test = pipe.predict_proba(x_test)[:, 1]
auc_test = skmet.roc_auc_score(y_true=y_test, y_score=probs_test)
print(f"auc_test={auc_test}")
initial_types = [
(feature, FloatTensorType([None, 1])) for feature in features
]
onx = to_onnx(pipe, initial_types=initial_types)
sess = rt.InferenceSession(
onx.SerializeToString(),
providers=["CPUExecutionProvider"],
)
inputs = {
feature: x_test[[feature]].to_numpy().astype("float32")
for feature in features
}
res = sess.run(None, inputs)
print(res[1][:2])
print(probs_test[:2])
New contributor
Alex Parker is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.