Here is an example with sample data
library(caret)
library(logistf)
library(data.table)
library(ggplot2)
# Create a synthetic dataset
set.seed(123)
n <- 100
data <- data.table(
predictor1 = rnorm(n),
predictor2 = rnorm(n),
predictor3 = rnorm(n),
group = factor(sample(c("control", "AD"), n, replace = TRUE))
)
# Normalize the predictors
data_normalized <- as.data.table(scale(data[, .SD, .SDcols = -"group"]))
data_normalized[, group := data$group]
# Split into train and test sets
train_index <- createDataPartition(data_normalized$group, p = .75, list = FALSE)
train <- data_normalized[train_index, ]
test <- data_normalized[-train_index, ]
# Normalize training data
train_proc <- predict(preProcess(train, method = c("center", "scale"), verbose = TRUE), train)
train_proc[, group := factor(group, levels = c("control", "AD"))]
# Define training control
train_control <- trainControl(method = "repeatedcv",
number = 3, repeats = 3,
savePredictions = TRUE,
classProbs = TRUE)
# Define the custom model function
firth_model <- list(
type = "Classification",
library = "logistf",
loop = NULL,
parameters = data.frame(parameter = c("none"), class = c("character"), label = c("none")),
grid = function(x, y, len = NULL, search = "grid") {
data.frame(none = "none")
},
fit = function(x, y, wts, param, lev, last, classProbs, ...) {
data <- as.data.frame(x)
data$group <- y
logistf(group ~ ., data = data, control = logistf.control(maxit = 100), ...)
},
predict = function(modelFit, newdata, submodels = NULL) {
as.factor(ifelse(predict(modelFit, newdata, type = "response") > 0.5, "AD", "control"))
},
prob = function(modelFit, newdata, submodels = NULL) {
preds <- predict(modelFit, newdata, type = "response")
data.frame(control = 1 - preds, AD = preds)
}
)
# Training the model
set.seed(123)
firth.logist.model <- train(train_proc[, .SD, .SDcols = !c("group")],
train_proc$group,
method = firth_model,
trControl = train_control)
print(firth.logist.model)
received the following error:
> firth.logist.model <- train(train_proc[, .SD, .SDcols = !c("group")],
+ train_proc$group,
+ method = firth_model,
+ trControl = train_control)
Something is wrong; all the Accuracy metric values are missing:
Accuracy Kappa
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :1 NA's :1
Error: Stopping
In addition: Warning message:
In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures.
I’m trying to compare performance across different models, when I use logistic regression or random forest from caret, the model all works (with the same training, cross validation, and test data), so not sure what’s failing the Firth’s model here.