I am running an ensemble of catboost, lightgmb and xgboost, in kaggle and colab.
I have issues with reproducing the same result, while using cpu or gpu.
This is for the lightgbm code.
import os
def seed_everything(seed=0):
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['CUDA_LAUNCH_BLOCKING'] = str(1)
np.random.seed(seed)
rmse_average = []
test_preds = []
seed_everything(seed=0)
kf = KFold(n_splits=10, shuffle=True, random_state=0)
for fold, (train_index, val_index) in enumerate(kf.split(X, y)):
seed_everything(seed=0)
print(50*'-')
print(f'Fold {fold+1}')
X_train, X_val = X.iloc[train_index], X.iloc[val_index]
y_train, y_val = np.log(y.iloc[train_index]), np.log(y.iloc[val_index])
seed_everything(seed=0)
model = LGBMRegressor(device = 'gpu', verbose=0, random_seed=0)
seed_everything(seed=0)
model.fit(X_train, y_train, eval_set=[(X_val, y_val)], eval_metric='rmse')#, early_stopping_rounds=50, verbose=500)
seed_everything(seed=0)
val_preds = np.exp(model.predict(X_val))
seed_everything(seed=0)
rmse = np.sqrt(mean_squared_error(np.exp(y_val), val_preds))
print("Validation RMSE:", rmse)
rmse_average.append(rmse)
# Append predictions from each fold to test_preds
seed_everything(seed=0)
test_pred_fold = np.exp(model.predict(test))
test_preds.append(test_pred_fold)
print(f'nAverage RMSE: {mean(rmse_average)}')
# Aggregate predictions from all folds
test_preds_combined = np.mean(test_preds, axis=0)
sample_submission['pm2_5'] = test_preds_combined
sample_submission.to_csv('submission.csv', index=False)