diff --git a/NEWS.md b/NEWS.md index 23e8d5c87..3f21d573e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,8 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. +4. Grouping operations with constant `list()` expressions in `j` are now optimized to avoid per-group allocation overhead, [#712](https://github.com/Rdatatable/data.table/issues/712). Thanks @macrakis for the report and @ben-schwen for the fix. + ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 ### BREAKING CHANGE diff --git a/R/data.table.R b/R/data.table.R index 27c985e44..a16e5c850 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1653,6 +1653,14 @@ replace_dot_alias = function(e) { if ( getOption("datatable.optimize")>=1L && (is.call(jsub) || (is.name(jsub) && jsub %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit # Optimization to reduce overhead of calling lapply over and over for each group oldjsub = jsub + + # Optimization: unwrap constant list() expressions to avoid per-group allocation + # e.g., list(1) -> 1, where the value is a simple atomic constant + if (jsub %iscall% "list" && length(jsub) == 2L && !is.null(jsub[[2L]]) && !is.call(jsub[[2L]]) && is_constantish(jsub[[2L]])) { + jsub = jsub[[2L]] + if (verbose) catf("Optimized j from list(constant) to bare constant\n") + } + funi = 1L # Fix for #985 # converted the lapply(.SD, ...) to a function and used below, easier to implement FR #2722 then. .massageSD = function(jsub) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fcf78e9f3..3f24ef672 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21978,3 +21978,10 @@ local({ test(2357.1, fread(f), DT) test(2357.2, fread(paste0("file://", f)), DT) }) + +# dt[, j=list(var), by] is slower than dt[, j=var, by], #712 +dt = data.table(x=rep(1:3, 2L), y=1L) +test(2358.1, dt[, .(1), by=x, verbose=TRUE], dt[, 1, by=x], output="lapply optimization changed j from") +dt = data.table(x=1:5, key="x") +test(2358.2, dt[dt, list(1), by=.EACHI, verbose=TRUE], dt[dt, 1, by=.EACHI], output="lapply optimization changed j from") +test(2358.3, dt[dt, list(x), by=.EACHI, verbose=TRUE], dt[dt, x, by=.EACHI], output="lapply optimization changed j from")