Good afternoon everyone
I’m trying to get three best performing models from MLFlow
Here is the complete loop of my code
def load_model_mlflow(experiment_name, metric_to_sort_by, model_type):
print(f"Start load '{experiment_name}' with best metric '{metric_to_sort_by}'")
def get_best_run_id(experiment_name, metric_to_sort_by):
experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
runs_df = mlflow.search_runs(experiment_id)
runs_df_filtered = runs_df[runs_df['status'] == 'FINISHED'][[col for col in runs_df.columns if col.startswith('params.') or col.startswith('metrics.') or col.startswith('run_id')]].dropna()
runs_df_sorted = runs_df_filtered.sort_values(f"metrics.{metric_to_sort_by}", ascending=False)
best_run_id = runs_df_sorted['run_id'].tolist()[0]
return best_run_id
try:
best_run_id = get_best_run_id(experiment_name,metric_to_sort_by)
except KeyError:
raise ValueError(f"Oops! '{metric_to_sort_by}' :That was no valid metric for this model '{experiment_name}'. Try again...")
if model_type == 'catboost':
return mlflow.catboost.load_model(f"runs:/{best_run_id}/model")
elif model_type == 'sklearn':
return mlflow.sklearn.load_model(f"runs:/{best_run_id}/model")
else:
raise ValueError(f"Oops! '{experiment_name}' :That was no valid model. Try again...")
# model_reg = load_model_mlflow("factoring_week_finance",'training_score','sklearn')
model_clf = load_model_mlflow("factoring_client_predict_fact",'f1_scores','catboost')
# model_debt = load_model_mlflow("factoring_debt_pay",'R2_score','catboost')
In this case, the sklearn model is executed and the model is loaded in ~20 seconds
And two other catboost models start loading and stop after ~1 minute with the error:
MlflowException: The following failures occurred while downloading one or more artifacts from https://…/api/2.0/mlflow-artifacts/artifacts/2/1c52ae2da0574d4fbc513ce29a9b77b0/artifacts:##### File model #####API request to https://…/api/2.0/mlflow-artifacts/artifacts/2/1c52ae2da0574d4fbc513ce29a9b77b0/artifacts/model failed with exception HTTPSConnectionPool(host=’…’, port=443): Max retries exceeded with url : /api/2.0/mlflow-artifacts/artifacts/2/1c52ae2da0574d4fbc513ce29a9b77b0/artifacts/model (Caused by ResponseError(‘too many 500 error responses’))
I tried to install
os.environ['MLFLOW_TRACKING_TIMEOUT'] = '600' # Set timeout to 600 seconds (10 minutes)
but there is no result