I am using the caret::train() function to train a custom model with the bnlearn library.
The grid-search is defined over different algorithms that are implemented in bnlearn.
I get these warnings:
"model fit failed for Fold1: algorithm=hc Error in check.nodes(name, x) : invalid node(s) 'xNames'.
"
Warning message:
"model fit failed for Fold1: algorithm=tabu Error in check.nodes(name, x) : invalid node(s) 'xNames'.
"
The dataset used here is the UCI adult dataset and could be reproduced with
install.packages("mlr3fairness")
library("mlr3")
data("adult_test", package = "mlr3fairness")
adult_test <- adult_test %>% rename(income = target)
This is the part of my prediction function where I use the train function:
model <- caret::train(income ~ .,
data = outer_trainData,
method = bn_model,
tuneGrid = tunegrid,
trControl = inner_control)
Here is my custom model:
# Define the custom model list
bn_model <- list(
label = "Bayesian Network",
library = "bnlearn",
type = "Classification",
parameters = data.frame(
parameter = c("algorithm"),
class = c("character"),
label = c("Algorithm")
),
grid = function(x, y, len = NULL, search = "grid") {
algorithms <- c("hc", "tabu", "gs", "iamb")
if (search == "grid") {
expand.grid(algorithm = algorithms)
} else {
data.frame(algorithm = sample(algorithms, len, replace = TRUE))
}
},
fit = function(x, y, wts, param, lev, last, classProbs, ...) {
df <- as.data.frame(x)
df$income <- y
# Ensure consistent factor levels across folds
df <- lapply(df, function(col) {
if (is.factor(col)) {
levels(col) <- union(levels(col), unique(col))
}
return(col)
})
df <- as.data.frame(df)
tryCatch({
# Train the Bayesian network using the specified algorithm
bn <- train_bn(df, param$algorithm)
# Fit the parameters of the Bayesian network
bn_fitted <- bn.fit(bn, df)
return(bn_fitted)
}, error = function(e) {
print(paste("Error in fitting model:", e$message))
return(NULL)
})
},
predict = function(modelFit, newdata, preProc = NULL, submodels = NULL) {
if (is.null(modelFit)) {
return(rep(NA, nrow(newdata)))
}
data <- discretize_df(newdata)
predictions <- tryCatch({
predict(modelFit, data)
}, error = function(e) {
print(paste("Error in prediction: ", e$message))
return(rep(NA, nrow(newdata)))
})
return(predictions)
},
prob = NULL,
predictors = function(x, ...) {
colnames(x)
},
sort = function(x) x,
levels = function(x) levels(x$obs)
)
And the functions used in the class are:
# Define cpdag_to_dag function
cpdag_to_dag <- function(cpdag) {
adj_matrix <- amat(cpdag)
ig <- graph_from_adjacency_matrix(adj_matrix, mode = "directed")
if (igraph::is_dag(ig)) {
return(cpdag)
}
directed_arcs <- directed.arcs(cpdag)
undirected_arcs <- undirected.arcs(cpdag)
while (nrow(undirected_arcs) > 0) {
arc <- undirected_arcs[1, , drop = FALSE]
cpdag <- set.arc(cpdag, from = arc[1, 1], to = arc[1, 2])
undirected_arcs <- undirected.arcs(cpdag)
}
return(cpdag)
}
train_bn <- function(data, algorithm) {
if (any(is.na(data))) {
stop("The data contains missing values.")
}
# Train the Bayesian network using the specified algorithm with default BIC score
if (algorithm =="hc") {
bn <- bnlearn::hc(data) # Using BIC by default
} else if (algorithm == "tabu") {
bn <- bnlearn::tabu(data) # Using BIC by default
} else if (algorithm == "gs") {
bn <- bnlearn::gs(data)
bn <- bnlearn::cpdag(bn) # Convert to CPDAG
bn <- cpdag_to_dag(bn) # Convert CPDAG to DAG
} else if (algorithm == "iamb") {
bn <- bnlearn::iamb(data)
bn <- bnlearn::cpdag(bn) # Convert to CPDAG
bn <- cpdag_to_dag(bn) # Convert CPDAG to DAG
} else {
stop("Unsupported algorithm.")
}
return(bn)
}
# Define a function to evaluate the Bayesian network model
evaluate_bn <- function(testData, bn_fitted, target_var) {
# Generate predictions for the target variable
predictions <- predict(bn_fitted, data = testData, node = target_var)
# Compare the predictions with the actual values in the test data
actual_values <- testData[[target_var]]
# Calculate the accuracy
accuracy <- mean(predictions == actual_values, na.rm = TRUE)
# You can also calculate other metrics such as:
confusion <- table(Predicted = predictions, Actual = actual_values)
precision <- diag(confusion) / rowSums(confusion)
recall <- diag(confusion) / colSums(confusion)
f1_score <- 2 * (precision * recall) / (precision + recall)
# Return a list of evaluation metrics
return(list(
accuracy = accuracy,
precision = precision,
recall = recall,
f1_score = f1_score
))
}
user26614834 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.