Need help with finding the reason why for loop outputs only 1s. When I delete the loop, it works just fine, outputting reasonable data, but within the loop, every row of created df is 1. Why is that?
def bootstrap(x, Nboot):
x = np.array(x)
models = []
resampled_stat = []
precision = []
recall = []
f1 = []
numb_rows = len(data)
for k in range(Nboot):
chosen_rows = np.random.choice(numb_rows, replace=True, size=numb_rows)
bootstrap_sample = data.iloc[chosen_rows]
resampled_stat.append(bootstrap_sample)
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)
model = tree.DecisionTreeClassifier().fit(X_train, Y_train)
models.append(model)
y_pred = model.predict(X_test)
precision.append(metrics.precision_score(Y_test, y_pred, average="macro"))
recall.append(metrics.recall_score(Y_test, y_pred, average="macro"))
f1.append(metrics.f1_score(Y_test, y_pred, average="macro"))
pred_df = pd.DataFrame(
{
"Precision": precision,
"Recall": recall,
"F1": f1,
"Models": models,
})
return pred_df