High Performance Benchmarks

Joseph Wood

02/09/2023

This document serves as an overview for measuring the performance of RcppAlgos against other tools for generating combinations, permutations, and partitions. This stackoverflow post: How to generate permutations or combinations of object in R? has some benchmarks. You will note that the examples in that post are relatively small. The benchmarks below will focus on larger examples where performance really matters and for this reason we only consider the packages arrangements, partitions, and RcppAlgos.

Setup Information

For the benchmarks below, we used a 2022 Macbook Air Apple M2 24 GB machine.

library(RcppAlgos)
library(partitions)
library(arrangements)
#> 
#> Attaching package: 'arrangements'
#> The following object is masked from 'package:partitions':
#> 
#>     compositions
library(microbenchmark)

options(digits = 4)
options(width = 90)

pertinent_output <- capture.output(sessionInfo())
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.2.1 (2022-06-23)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Monterey 12.6

pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark")
sapply(pkgs, packageVersion, simplify = FALSE)
#> $RcppAlgos
#> [1] '2.7.2'
#> 
#> $arrangements
#> [1] '1.1.9'
#> 
#> $partitions
#> [1] '1.10.7'
#> 
#> $microbenchmark
#> [1] '1.4.7'

numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6)
numThreads
#> [1] 4

Combinations

Combinations - Distinct

set.seed(13)
v1 <- sort(sample(100, 30))
m <- 21
t1 <- comboGeneral(v1, m, Parallel = T)
t2 <- combinations(v1, m)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150       21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v1, m),
               cbArrangements = combinations(v1, m),
               times = 15, unit = "relative")
#> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), :
#> less accurate nanosecond times to avoid potential integer overflows
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 3.017 2.990 2.941  2.954 2.877 2.919    15
#>  cbArrangements 3.052 3.025 2.975  2.991 2.915 2.931    15

Combinations - Repetition

v2 <- v1[1:10]
m <- 20
t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t2 <- combinations(v2, m, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
               cbArrangements = combinations(v2, m, replace = TRUE),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 2.960 2.848 2.790  2.776 2.739 2.661    15
#>  cbArrangements 2.861 2.842 2.774  2.767 2.722 2.656    15

Combinations - Multisets

myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t2 <- combinations(freq = myFreqs, k = 20, x = v3)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082       20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
               cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
               cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.096 3.001 2.960  2.951 2.899 2.832    10
#>  cbArrangements 5.729 5.663 5.562  5.571 5.475 5.220    10

Permutations

Permutations - Distinct

v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
t1 <- permuteGeneral(v4, 6, nThreads = numThreads)
t2 <- permutations(v4, 6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720       6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v4, 6),
               cbArrangements = permutations(v4, 6),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    15
#>  cbRcppAlgosSer 1.462 1.458 1.314  1.461 1.358 1.136    15
#>  cbArrangements 2.510 2.502 2.316  2.504 2.564 1.744    15


## Indexing permutation example with the partitions package
t1 <- permuteGeneral(11, nThreads = 4)
t2 <- permutations(11)
t3 <- perms(11)

dim(t1)
#> [1] 39916800       11

stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())

microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
               cbRcppAlgosSer = permuteGeneral(11),
               cbArrangements = permutations(11),
               cbPartitions   = perms(11),
               times = 5, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000     5
#>  cbRcppAlgosSer 2.564 2.851 2.864  2.845 2.837 3.210     5
#>  cbArrangements 4.314 4.294 4.385  4.264 4.540 4.503     5
#>    cbPartitions 7.928 8.161 8.605  8.818 8.888 9.194     5

Permutations - Repetition

v5 <- v3[1:5]
t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t2 <- permutations(v5, 10, replace = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
               cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq    max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.0000    10
#>  cbRcppAlgosSer 2.856 2.751 2.117  2.573 2.382 0.7991    10
#>  cbArrangements 3.456 3.304 2.728  3.092 2.900 1.5888    10

Permutations - Multisets

v6 <- sort(runif(12))
t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760        7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
               cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
               cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.579 3.519 3.117  3.432 2.386 2.618    10
#>  cbArrangements 3.893 3.853 3.476  3.745 2.906 2.858    10

Partitions

Partitions - Distinct

All Distinct Partitions

t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 140)
t2 <- partitions(140, distinct = TRUE)
t3 <- diffparts(140)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
                    all(rowSums(t1) == 140), all(rowSums(t2) == 140),
                    all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150      16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
               cbArrangements = partitions(140, distinct = TRUE),
               cbPartitions   = diffparts(140),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median     uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.000  1.000    10
#>  cbRcppAlgosSer  3.355  3.322  2.797  2.613  2.614  2.469    10
#>  cbArrangements  2.680  2.638  2.213  1.997  2.123  1.973    10
#>    cbPartitions 18.605 18.942 15.279 14.298 13.860 13.067    10

Restricted Distinct Partitions

t1 <- comboGeneral(160, 10,
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 160)
t2 <- partitions(160, 10, distinct = TRUE)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920      10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(160, 10),
               cbArrangements = partitions(160, 10, distinct = TRUE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    10
#>  cbRcppAlgosSer 3.381 3.358 3.369  3.245 3.538 3.454    10
#>  cbArrangements 4.460 4.445 4.347  4.295 4.104 4.368    10

Partitions - Repetition

All Partitions

t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 65)
t2 <- partitions(65)
t3 <- parts(65)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 65), all(rowSums(t2) == 65),
          all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558      65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
               cbArrangements = partitions(65),
               cbPartitions   = parts(65),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 2.879 2.813 2.238  2.349 2.082 1.730    20
#>  cbArrangements 2.147 2.119 1.682  1.812 1.643 1.115    20
#>    cbPartitions 9.218 9.255 6.911  7.789 6.319 4.357    20

Restricted Partitions

t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum",
                   comparisonFun = "==", limitConstraints = 100)
t2 <- partitions(100, 15)
stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212      15
rm(t1, t2)

# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
                                                  nThreads = numThreads),
               cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
               cbArrangements = partitions(100, 15),
               cbPartitions   = restrictedparts(100, 15,
                                                include.zero = FALSE),
               times = 10, unit = "relative")
#> Unit: relative
#>            expr    min     lq   mean median     uq    max neval
#>  cbRcppAlgosPar  1.000  1.000  1.000  1.000  1.000  1.000    10
#>  cbRcppAlgosSer  3.427  3.311  2.961  3.051  2.697  2.614    10
#>  cbArrangements  4.257  4.116  3.752  4.037  3.343  3.284    10
#>    cbPartitions 15.389 14.998 13.315 14.072 11.701 11.467    10

Partitions - Multisets

Currenlty, RcppAlgos is the only package capable of efficiently generating partitions of multisets. Therefore, we will only time RcppAlgos and use this as a reference for future improvements.

t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15),
                   constraintFun = "sum", comparisonFun = "==",
                   limitConstraints = 120)
dim(t1)
#> [1] 7340225      10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
               times = 10)
#> Unit: milliseconds
#>         expr   min    lq mean median    uq   max neval
#>  cbRcppAlgos 285.7 287.8  294    290 291.5 326.7    10

Compositions

Compositions - Repetition

All Compositions (Small case)

t1 <- compositionsGeneral(0:15, repetition = TRUE)
t2 <- arrangements::compositions(15)
t3 <- partitions::compositions(15)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
          all(rowSums(t1) == 15), all(rowSums(t2) == 15),
          all(colSums(t3) == 15))
dim(t1)
#> [1] 16384    15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
               cbArrangements = arrangements::compositions(15),
               cbPartitions   = partitions::compositions(15),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr     min      lq    mean  median      uq     max neval
#>  cbRcppAlgosSer   1.000   1.000   1.000   1.000   1.000   1.000    20
#>  cbArrangements   1.196   1.229   1.204   1.212   1.197   1.209    20
#>    cbPartitions 132.342 139.562 183.336 182.645 213.096 252.348    20

For the next two examples, we will exclude the partitions package for efficiency reasons.

All Compositions (Larger case)

t1 <- compositionsGeneral(0:23, repetition = TRUE)
t2 <- arrangements::compositions(23)

# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
          all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304      23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
               cbArrangements = arrangements::compositions(23),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 3.486 3.377 3.368  3.369 3.349 3.344    20
#>  cbArrangements 3.903 3.781 3.772  3.773 3.752 3.744    20

Restricted Compositions

t1 <- compositionsGeneral(30, 10, repetition = TRUE)
t2 <- arrangements::compositions(30, 10)

stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005       10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
                                                    nThreads = numThreads),
               cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
               cbArrangements = arrangements::compositions(30, 10),
               times = 20, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>  cbRcppAlgosPar 1.000 1.000 1.000  1.000 1.000 1.000    20
#>  cbRcppAlgosSer 3.043 3.107 3.043  3.076 2.986 3.184    20
#>  cbArrangements 3.174 3.128 3.039  3.097 3.006 2.681    20

Iterators

We will show one example from each category to demonstrate the efficiency of the iterators in RcppAlgos. The results are similar for the rest of the cases not shown.

Combinations

pkg_arrangements <- function(n, total) {
    a <- icombinations(n, as.integer(n / 2))
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- comboIter(n, as.integer(n / 2))
    for (i in 1:total) a@nextIter()
}

total <- comboCount(18, 9)
total
#> [1] 48620

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(18, total),
               cbArrangements = pkg_arrangements(18, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 19.51 19.08 18.56  18.77 18.15 17.38    15

Permutations

pkg_arrangements <- function(n, total) {
    a <- ipermutations(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- permuteIter(n)
    for (i in 1:total) a@nextIter()
}

total <- permuteCount(8)
total
#> [1] 40320

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(8, total),
               cbArrangements = pkg_arrangements(8, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.0   1.00  1.00  1.00    15
#>  cbArrangements 19.11 18.87 18.5  18.45 18.09 18.56    15

Partitions

pkg_partitions <- function(n, total) {
    a <- firstpart(n)
    for (i in 1:(total - 1)) a <- nextpart(a)
}

pkg_arrangements <- function(n, total) {
    a <- ipartitions(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- partitionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

total <- partitionsCount(0:40, repetition = TRUE)
total
#> [1] 37338

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(40, total),
               cbArrangements = pkg_arrangements(40, total),
               cbPartitions   = pkg_partitions(40, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 15.49 15.43 14.74  15.25 14.06 13.35    15
#>    cbPartitions 25.08 24.90 23.98  24.61 22.64 22.54    15

Compositions

pkg_partitions <- function(n, total) {
    a <- firstcomposition(n)
    for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}

pkg_arrangements <- function(n, total) {
    a <- icompositions(n)
    for (i in 1:total) a$getnext()
}

pkg_RcppAlgos <- function(n, total) {
    a <- compositionsIter(0:n, repetition = TRUE)
    for (i in 1:total) a@nextIter()
}

total <- compositionsCount(0:15, repetition = TRUE)
total
#> [1] 16384

microbenchmark(cbRcppAlgos    = pkg_RcppAlgos(15, total),
               cbArrangements = pkg_arrangements(15, total),
               cbPartitions   = pkg_partitions(15, total),
               times = 15, unit = "relative")
#> Unit: relative
#>            expr   min    lq  mean median    uq   max neval
#>     cbRcppAlgos  1.00  1.00  1.00   1.00  1.00  1.00    15
#>  cbArrangements 13.81 13.65 13.34  13.65 13.32 11.82    15
#>    cbPartitions 44.86 44.59 44.09  44.89 43.89 40.62    15