I have a table like this:
set.seed(123)
random_table <- data.frame(
Column1 = sample(1:10, 5, replace = TRUE),
Column2 = sample(1:10, 5, replace = TRUE),
Column3 = sample(1:10, 5, replace = TRUE),
Column4 = sample(1:10, 5, replace = TRUE),
Column5 = sample(1:10, 5, replace = TRUE)
)
Column1 Column2 Column3 Column4 Column5
3 5 5 3 9
3 4 3 8 3
10 6 9 10 4
2 9 9 7 1
6 10 9 10 7
I want to make a function that checks if “n” numbers from this table can sum to some value “m”. When this is possible, I want to record all such combinations (else NULL).
I tried to write a function to do this using the combinat library:
library(combinat)
find_combinations <- function(table, num, target_sum) {
combinations <- combn(as.vector(as.matrix(table)), num)
valid_combinations <- list()
valid_cells <- list()
for (i in 1:ncol(combinations)) {
if (sum(combinations[, i]) == target_sum) {
valid_combinations <- append(valid_combinations, list(combinations[, i]))
cells <- c()
for (value in combinations[, i]) {
cell <- which(table == value, arr.ind = TRUE)[1, ]
cells <- c(cells, paste0(LETTERS[cell[2]], cell[1]))
}
valid_cells <- append(valid_cells, list(cells))
}
}
if (length(valid_combinations) > 0) {
result <- data.frame(
id = seq_along(valid_combinations),
sum = rep(target_sum, length(valid_combinations)),
numbers_selected = sapply(valid_combinations, function(x) paste(x, collapse = ",")),
cells = sapply(valid_cells, function(x) paste(x, collapse = ","))
)
} else {
result <- data.frame(
id = NA,
sum = NA,
numbers_selected = NA,
cells = NA
)
}
return(result)
}
I then called the function for a specific example and removed all duplicates (relative to the cells column):
result <- find_combinations(random_table, num = 4, target_sum = 19)
result$sorted_cells <- sapply(strsplit(result$cells, ","), function(x) paste(sort(x), collapse = ","))
result <- result[!duplicated(result$sorted_cells), ]
result$sorted_cells <- NULL
result$id <- seq_len(nrow(result))
The output looks like this:
id sum numbers_selected cells
1 19 3,3,10,3 A1,A1,A3,A1
2 19 3,3,6,7 A1,A1,A5,D4
3 19 3,3,5,8 A1,A1,B1,D2
4 19 3,3,4,9 A1,A1,B2,B4
5 19 3,10,2,4 A1,A3,A4,B2
Are there any standard ways to do this in R (ex: Finding all possible combinations of numbers from a vector to reach a given sum (No repetitions), Getting all the combination of numbers from a list that would sum to a specific number)? Or do we really have to write a function?
4
This is a restricted subset sum problem. Take a look at RcppAlgos::partitionsGeneral
(also see this vignette).
library(RcppAlgos)
result <- partitionsGeneral(unlist(random_table), 4, target = 19)
dim(result)
#> [1] 328 4
all(rowSums(result) == 19)
#> [1] TRUE
A function to get indices:
f <- function(df, num, target_sum, linear = TRUE) {
x <- unlist(df, 1, 1)
vals <- unique(partitionsGeneral(x, num, target = target_sum))
idx <- array(split(seq_along(x), x)[as.character(vals)], dim(vals))
out <- do.call(rbind, lapply(asplit(idx, 1), comboGrid, repetition = FALSE))
if (!(linear || is.vector(df))){
out[] <- outer(1:nrow(df), LETTERS[1:ncol(df)], (a, b) paste0(b, a))[out]
}
out
}
Testing:
result_idx <- f(random_table, 4, 19)
dim(result_idx)
#> [1] 552 4
# check that each row corresponds to a set of indices of values that sum correctly
x <- result_idx
x[] <- unlist(df, 1, 1)[result_idx]
all(rowSums(x) == 19)
#> [1] TRUE
f(random_table, 4, 19, FALSE)[1:5,]
#> Var1 Var2 Var3 Var4
#> [1,] "E4" "A4" "A5" "A3"
#> [2,] "E4" "A4" "A5" "B5"
#> [3,] "E4" "A4" "A5" "D3"
#> [4,] "E4" "A4" "A5" "D5"
#> [5,] "E4" "A4" "B3" "A3"
Note that for this example, result
and result_idx
don’t have the same number of rows. partitionsGeneral
stops early once it has identified all unique outputs (see the comment below from the package’s author, Joseph Wood).
11
In base R, it might be fun if you implement a custom function in a recursion manner
f <- function(tgt, n, dat = random_table) {
v <- unlist(dat)
k <- seq_along(v)
helper <- function(tgt, n, idx = k) {
if (n == 1 && any(tgt == v)) {
return(as.list(idx[tgt == v]))
}
if ((n == 0 && tgt > 0) || (n > 0 && tgt <= 0) || (n == 1 && !any(tgt == v))) {
return(NULL)
}
unique(
unlist(
lapply(idx, (i) {
lapply(helper(tgt - v[i], n - 1), (u) c(i, u))
}),
recursive = FALSE
))
}
combs <- helper(tgt, n)
loc <- paste0(head(LETTERS, ncol(dat))[col(dat)], row(dat))
do.call(
rbind,
lapply(combs, (j) {
data.frame(
sum = sum(v[j]),
number_selected = toString(v[j]),
cells = toString(loc[j])
)
})
)
}
and you can run res <- f(19, 4)
and will see
> head(res, 20)
sum number_selected cells
1 19 3, 3, 3, 10 A1, A1, A1, A3
2 19 3, 3, 3, 10 A1, A1, A1, B5
3 19 3, 3, 3, 10 A1, A1, A1, D3
4 19 3, 3, 3, 10 A1, A1, A1, D5
5 19 3, 3, 3, 10 A1, A1, A2, A3
6 19 3, 3, 3, 10 A1, A1, A2, B5
7 19 3, 3, 3, 10 A1, A1, A2, D3
8 19 3, 3, 3, 10 A1, A1, A2, D5
9 19 3, 3, 10, 3 A1, A1, A3, A1
10 19 3, 3, 10, 3 A1, A1, A3, A2
11 19 3, 3, 10, 3 A1, A1, A3, C2
12 19 3, 3, 10, 3 A1, A1, A3, D1
13 19 3, 3, 10, 3 A1, A1, A3, E2
14 19 3, 3, 6, 7 A1, A1, A5, D4
15 19 3, 3, 6, 7 A1, A1, A5, E5
16 19 3, 3, 5, 8 A1, A1, B1, D2
17 19 3, 3, 4, 9 A1, A1, B2, B4
18 19 3, 3, 4, 9 A1, A1, B2, C3
19 19 3, 3, 4, 9 A1, A1, B2, C4
20 19 3, 3, 4, 9 A1, A1, B2, C5
2
Given jblood94 excellent answer, this may not be of any added value, but I wrote the following, which provides the cell indices:
library(data.table)
names(random_table) <- LETTERS[1:5]
f <- (tbl,num,target) {
k <- combn(unlist(tbl),4,simplify = F)
k <- k[sapply(k,(i) sum(i)==target)]
rbindlist(lapply(seq_along(k), (i) {
data.table(i, ns = k[[i]],cells = names(k[[i]]))[order(i,ns,cells), lapply(.SD, (s) paste(s,collapse=",")), i]
}))
}
f(random_table, 4, 19)
Output:
i ns cells
<int> <char> <char>
1: 1 3,3,3,10 A1,A2,C2,A3
2: 2 3,3,3,10 A1,A2,D1,A3
3: 3 3,3,3,10 A1,A2,E2,A3
4: 4 3,3,6,7 A1,A2,A5,D4
5: 5 3,3,6,7 A1,A2,A5,E5
---
548: 548 1,3,7,8 E4,D1,E5,D2
549: 549 3,3,4,9 D1,E2,E3,E1
550: 550 1,3,7,8 E4,E2,D4,D2
551: 551 1,3,7,8 E4,E2,E5,D2
552: 552 1,4,7,7 E4,E3,D4,E5
(Also, note that my output is a bit longer than the above answer, but I can’t yet see a problem with what I’m producing.)
2
Another relatively fast way using combinations
from gtools
library(gtools)
names(random_table) <- LETTERS[1:5]
nms <- names(random_table)
getDesSum <- function(dat, out, tsum){
vect <- as.vector(t(dat)); row <- nrow(dat); col <- ncol(dat)
data.frame(do.call(rbind,
apply(combinations(row*col, out, 1:(row*col)), 1, (x){
res <- vect[x]; Sum <- sum(res)
if(Sum == tsum){
list(Sum = Sum,
numbers_selected = res,
cells = paste0(nms[floor((x-1) / col)+1], ((x-1) %% col)+1))
}
})))}
output (limited with head
)
head(getDesSum(random_table, 4, 19))
Sum numbers_selected cells
1 19 3, 5, 5, 6 A1, A2, A3, C2
2 19 3, 5, 5, 6 A1, A2, A3, E1
3 19 3, 5, 3, 8 A1, A2, A4, B4
4 19 3, 5, 9, 2 A1, A2, A5, D1
5 19 3, 5, 3, 8 A1, A2, B1, B4
6 19 3, 5, 4, 7 A1, A2, B2, D4
nrow(getDesSum(random_table, 4, 19))
[1] 552
4