I have successfully used the ClusterBootstrap package in R in the past to perform hierarchical bootstrapping. I am now attempting to apply it to a new data set, and am receiving an error when using the clusbootglm function to create a glm using the predictors Pain and Drug on P7_Weight which using Dam_Number (the litter ID) to control for litter.
Here is the dput to recreate the data set in question:
library(tidyverse)
library(ClusterBootstrap)
structure(list(Dam_Number = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L), levels =
c("6861",
"6867", "6868", "6870", "7273", "7274", "7275", "7276", "7300",
"7303", "7304"), class = "factor"), Pain_Drug = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), levels =
c("Sham_Saline",
"CCI_Oxy", "CCI_Saline", "Sham_Oxy"), class = "factor"), Pain =
structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Sham",
"CCI"), class = "factor"), Drug = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), levels = c("Saline", "Oxy"),
class = "factor"),
Sex = c("M", "M", "M", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"F", "F", "F", "F", "F", "F", "M", "M", "M", "M", "M", "M",
"F", "F", "F", "F", "M", "M", "M", "M", "M", "M", "F", "F",
"F", "F", "M", "M", "M", "M", "M", "F", "F", "F", "F", "F",
"F", "F", "M", "M", "M", "M", "M", "M", "M", "F", "F", "F",
"F", "F", "F", "F", "F", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "F", "F", "F", "F", "F", "F", "F", "M", "M", "M",
"M", "M", "M", "M", "F", "F", "F", "F", "F", "M", "M", "M",
"M", "M", "M", "M", "F", "F", "F", "F", "F", "M", "F", "F",
"F", "F", "F", "F", "F", "M", "M", "M", "M", "M", "M", "M",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F"), Pup_Number =
c(1,
2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17), P7_Weight
= c(14.2,
14.1, 14.3, 14, 13, 14.1, 14.1, 12.9, 13.6, 12.4, 14, 11.5,
12, 11, 11.6, 11.4, 8.7, 13.5, 10, 11.7, 10.9, 11.2, 11.3,
8.9, 11, 10.1, 10.8, 10, 16.5, 16.6, 14.5, 16.3, 14.9, 17.2,
15.8, 16.2, 15.7, 15.2, 15.4, 15.7, 16.3, 16.2, 16.3, 15.2,
16.4, 14.8, 15.2, 16.1, 14.8, 15.3, 14.6, 16, 15.9, 13.2,
15.2, 14.8, 14.8, 14.4, 13.2, 15.4, 12.3, 10.6, 11.5, 10,
12.8, 12.2, 12.7, 10.8, 10.8, 9.2, 10, 10, 10, 8.5, 10, 13.9,
16.5, 13, 16.3, 16.6, 13.8, 12.4, 14.3, 13.5, 15.2, 14.6,
13.1, 14, 15.8, 13.6, 11.7, 16, 14.1, 16.1, 14.2, 15.6, 16.3,
16, 14.5, 15.3, 15.5, 14.7, 13.7, 14.6, 14.1, 15.3, 14.6,
14.5, 13.1, 13.1, 14.2, 13.4, 12.1, 13.8, 13.6, 15.1, 13.2,
15, 15.7, 14.4, 14.7, 13.7, 13.5, 11.7, 12.3, 13.3, 10, 12.9,
12.9, 10, 10, 12.4, 11.3, 11.4, 13.2, 11.2, 12.7, 11.1, 9.5,
10), Ref = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -140L
), class = c("tbl_df", "tbl", "data.frame"))
When I run the clusbootglm function as below:
model_weight <- clusbootglm(P7_Weight ~ Pain*Drug, data = weights, clusterid = Dam_Number, family = gaussian, B = 5000, confint.level = 0.95, n.cores = 1)
It returns the following error (with traceback included):
'Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
11.stop("contrasts can be applied only to factors with 2 or more levels")
10.`contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]])
9.model.matrix.default(mt, mf, contrasts)
8.model.matrix(mt, mf, contrasts)
7.glm(model, family = family, data = data[obs, ])
6.coef(glm(model, family = family, data = data[obs, ]))
5.doTryCatch(return(expr), name, parentenv, handler)
4.tryCatchOne(expr, names, parentenv, handlers[[1L]])
3.tryCatchList(expr, classes, parentenv, handlers)
2.tryCatch(coef(glm(model, family = family, data = data[obs, ])),warning = function(x) rep(as.numeric(NA), p))
1.clusbootglm(P7_Weight ~ Pain * Drug, data = weights, clusterid = Dam_Number, family = gaussian, B = 5000, confint.level = 0.95, n.cores = 1)'
There is a lot of discussion on the site about this error, but all questions return to the idea that there are NAs or constant variables in the data set. There was an excellent function provided in this post to attempt to debug this error. My data set passes this debug with flying colors, stating that all predictors have greater than 2 levels:
'$nlevels
Dam_Number Pain_Drug Pain Drug Sex
11 4 2 2 2
$levels
$levels$Dam_Number
[1] "6861" "6867" "6868" "6870" "7273" "7274" "7275" "7276" "7300" "7303" "7304"
$levels$Pain_Drug
[1] "Sham_Saline" "CCI_Oxy" "CCI_Saline" "Sham_Oxy"
$levels$Pain
[1] "Sham" "CCI"
$levels$Drug
[1] "Saline" "Oxy"
$levels$Sex
[1] "F" "M"'
Interestingly, if I use a combined variable (Pain_Drug) which has 4 levels, the code runs no problem.
model_weight <- clusbootglm(P7_Weight ~ Pain_Drug, data = weights, clusterid = Dam_Number, family = gaussian, B = 5000, confint.level = 0.95, n.cores = 1)
It seems there is a problem with the interaction aspect.
6
The function clusbootglm
handles numeric input:
library(tidyverse)
library(ClusterBootstrap)
weights <- weights |>
mutate(Sex = as.factor(Sex)) |>
mutate(across(-Dam_Number, ~as.numeric(.))) |>
select(Dam_Number, Pain, Drug, P7_Weight)
model_weight <- clusbootglm(P7_Weight ~ Pain*Drug, data = weights,
clusterid = Dam_Number, family = gaussian, B = 5000, confint.level = 0.95, n.cores = 1)
6