From e4e4f9be8364d2db6546ead5991aacd1a34d6d7d Mon Sep 17 00:00:00 2001 From: manmita Date: Thu, 8 Jan 2026 03:15:43 +0530 Subject: [PATCH 01/11] fix(7571): bug fix for narm issue on gforce in int64 case --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 24 ++++++++++++++++++++++++ src/gsumm.c | 3 +-- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 23e8d5c873..b379ea1df1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,6 +32,8 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. +4. Fixed a bug in GForce grouped sum for `integer64` columns where `sum(value, na.rm = FALSE)` did not always return `NA` when any `NA_integer64_` was present in a group. [#7571](https://github.com/Rdatatable/data.table/issues/7571). Now, groups containing any `NA_integer64_` correctly return `NA`, matching base R behavior. Thanks to @rweberc for the report and @MichaelChirico for reproducible examples. + ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 ### BREAKING CHANGE diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fcf78e9f36..58176cdd4a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21978,3 +21978,27 @@ local({ test(2357.1, fread(f), DT) test(2357.2, fread(paste0("file://", f)), DT) }) + +#7571 issue for na.rm on int64 +if (test_bit64) local({ + # integer64 + GForce grouped sum with na.rm = FALSE + # Example 1 from issue: ids 1:8, 9, 9; three leading NAs then 4:10 + dt_short = data.table( + id = c(1:8, 9, 9), + value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) + ) + test(2358.1, + dt_short[, sum(value, na.rm = FALSE), by = id][, V1], + as.integer64(c(NA, NA, NA, 4:8, 19)) + ) + + # Example 2 from issue: ids in pairs, same values; checks multi-row groups + dt_short2 = data.table( + id = rep(1:5, each = 2L), + value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) + ) + test(2358.1, + dt_short2[, sum(value, na.rm = FALSE), by = id][, V1], + as.integer64(c(NA, NA, 11, 15, 19)) + ) +}) \ No newline at end of file diff --git a/src/gsumm.c b/src/gsumm.c index 5970f59194..d69c201982 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -503,11 +503,10 @@ SEXP gsum(SEXP x, SEXP narmArg) const uint16_t *my_low = low + b*batchSize + pos; for (int i=0; i Date: Thu, 8 Jan 2026 03:28:25 +0530 Subject: [PATCH 02/11] fix(7571): test sequencing --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 58176cdd4a..9cb3d3e4e0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21997,7 +21997,7 @@ if (test_bit64) local({ id = rep(1:5, each = 2L), value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) ) - test(2358.1, + test(2358.2, dt_short2[, sum(value, na.rm = FALSE), by = id][, V1], as.integer64(c(NA, NA, 11, 15, 19)) ) From 3819da0707bcb41752781a196cc838cf04760259 Mon Sep 17 00:00:00 2001 From: manmita Date: Thu, 8 Jan 2026 03:39:24 +0530 Subject: [PATCH 03/11] fix(7571): updated the NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b379ea1df1..289fdc7600 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,7 +32,7 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. -4. Fixed a bug in GForce grouped sum for `integer64` columns where `sum(value, na.rm = FALSE)` did not always return `NA` when any `NA_integer64_` was present in a group. [#7571](https://github.com/Rdatatable/data.table/issues/7571). Now, groups containing any `NA_integer64_` correctly return `NA`, matching base R behavior. Thanks to @rweberc for the report and @MichaelChirico for reproducible examples. +4. - Fixed a bug in GForce grouped sum for `integer64` columns where `sum(value, na.rm = FALSE)` when consecutive NA values are present in the same partition but different group. In that case only the first group was handled ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report, @MichaelChirico for reproducible examples, and @manmita for the fix. ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 From f1952d5df9fa2a432f1886a6789a462c9562eab9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 7 Jan 2026 14:18:40 -0800 Subject: [PATCH 04/11] trailing newline --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9cb3d3e4e0..4d50124db4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22001,4 +22001,4 @@ if (test_bit64) local({ dt_short2[, sum(value, na.rm = FALSE), by = id][, V1], as.integer64(c(NA, NA, 11, 15, 19)) ) -}) \ No newline at end of file +}) From 2e0e8d6034bb72c195a897143244326902dece08 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 7 Jan 2026 14:20:05 -0800 Subject: [PATCH 05/11] Use $V1 --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4d50124db4..bf7f8b6f90 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21988,7 +21988,7 @@ if (test_bit64) local({ value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) ) test(2358.1, - dt_short[, sum(value, na.rm = FALSE), by = id][, V1], + dt_short[, sum(value, na.rm = FALSE), by = id]$V1, as.integer64(c(NA, NA, NA, 4:8, 19)) ) @@ -21998,7 +21998,7 @@ if (test_bit64) local({ value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) ) test(2358.2, - dt_short2[, sum(value, na.rm = FALSE), by = id][, V1], + dt_short2[, sum(value, na.rm = FALSE), by = id]$V1, as.integer64(c(NA, NA, 11, 15, 19)) ) }) From 3d5032d64a87fbe36259bf5b1772407fcd519f1e Mon Sep 17 00:00:00 2001 From: manmita Date: Thu, 8 Jan 2026 03:53:49 +0530 Subject: [PATCH 06/11] fix(7571): added db optimize 2L --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bf7f8b6f90..e67962b259 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21987,7 +21987,7 @@ if (test_bit64) local({ id = c(1:8, 9, 9), value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) ) - test(2358.1, + test(2358.1, options=c(datatable.optimize=2L), dt_short[, sum(value, na.rm = FALSE), by = id]$V1, as.integer64(c(NA, NA, NA, 4:8, 19)) ) @@ -21997,7 +21997,7 @@ if (test_bit64) local({ id = rep(1:5, each = 2L), value = c(rep(NA_integer64_, 3L), as.integer64(4:10)) ) - test(2358.2, + test(2358.2, options=c(datatable.optimize=2L), dt_short2[, sum(value, na.rm = FALSE), by = id]$V1, as.integer64(c(NA, NA, 11, 15, 19)) ) From fc59def919bb84fc401b04ff240bf92a747d5596 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 7 Jan 2026 14:25:08 -0800 Subject: [PATCH 07/11] refine NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 289fdc7600..93930e76df 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,7 +32,7 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. -4. - Fixed a bug in GForce grouped sum for `integer64` columns where `sum(value, na.rm = FALSE)` when consecutive NA values are present in the same partition but different group. In that case only the first group was handled ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report, @MichaelChirico for reproducible examples, and @manmita for the fix. +4. `sum()` by group is correct with GFoce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups. ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025 From 980a914e98ec29b69d7b38dd1efa5f97fc968107 Mon Sep 17 00:00:00 2001 From: manmita Date: Fri, 9 Jan 2026 00:01:20 +0530 Subject: [PATCH 08/11] fix(7571): add more tests and change to code similar to int for gsum --- inst/tests/tests.Rraw | 21 +++++++++++++++++++++ src/gsumm.c | 12 +++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e67962b259..ed10ba04c1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22001,4 +22001,25 @@ if (test_bit64) local({ dt_short2[, sum(value, na.rm = FALSE), by = id]$V1, as.integer64(c(NA, NA, 11, 15, 19)) ) + + # Test mean for integer64 with NA + dt_mean = data.table( + id = c(1,1,2,2,3,3), + value = as.integer64(c(NA, 10, NA, NA, 5, NA)) + ) + test(2358.3, options=c(datatable.optimize=2L), + dt_mean[, mean(value), by = id]$V1, + c(10, NA, 5) + ) + + # Randomized test: GForce sum vs base::sum for integer64 + set.seed(7571) + DT = data.table(id = sample(letters, 1000, TRUE), value = as.integer64(sample(c(1:100, NA), 1000, TRUE))) + gforce = DT[, .(gforce_sum = sum(value)), by=id] + base = DT[, .(true_sum = base::sum(value)), by=id] + merged = merge(gforce, base, by="id", all=TRUE) + test(2358.4, options=c(datatable.optimize=2L), + all(merged$gforce_sum == merged$true_sum | (is.na(merged$gforce_sum) & is.na(merged$true_sum))), + TRUE + ) }) diff --git a/src/gsumm.c b/src/gsumm.c index d69c201982..902a88e707 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -502,12 +502,14 @@ SEXP gsum(SEXP x, SEXP narmArg) const int64_t *my_gx = gx + b*batchSize + pos; const uint16_t *my_low = low + b*batchSize + pos; for (int i=0; i Date: Fri, 9 Jan 2026 00:28:00 +0530 Subject: [PATCH 09/11] fix(7571): added more tests for mean --- inst/tests/tests.Rraw | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed10ba04c1..aba5720a60 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22005,21 +22005,28 @@ if (test_bit64) local({ # Test mean for integer64 with NA dt_mean = data.table( id = c(1,1,2,2,3,3), - value = as.integer64(c(NA, 10, NA, NA, 5, NA)) + value = as.integer64(c(NA, NA, NA, 20000000, 5, 3)) ) test(2358.3, options=c(datatable.optimize=2L), - dt_mean[, mean(value), by = id]$V1, - c(10, NA, 5) + dt_mean[, mean(value, na.rm=FALSE), by = id]$V1, + c(NA, NA, 4) ) - # Randomized test: GForce sum vs base::sum for integer64 - set.seed(7571) + # GForce sum vs base::sum for integer64 DT = data.table(id = sample(letters, 1000, TRUE), value = as.integer64(sample(c(1:100, NA), 1000, TRUE))) gforce = DT[, .(gforce_sum = sum(value)), by=id] base = DT[, .(true_sum = base::sum(value)), by=id] merged = merge(gforce, base, by="id", all=TRUE) test(2358.4, options=c(datatable.optimize=2L), - all(merged$gforce_sum == merged$true_sum | (is.na(merged$gforce_sum) & is.na(merged$true_sum))), - TRUE + merged$gforce_sum, merged$true_sum + ) + + # GForce mean vs base::mean for integer64 + DTm = data.table(id = sample(letters, 1000, TRUE), value = as.integer64(sample(c(1:100, NA), 1000, TRUE))) + gforce_m = DTm[, .(gforce_mean = mean(value)), by=id] + base_m = DTm[, .(true_mean = base::mean(value)), by=id] + merged_m = merge(gforce_m, base_m, by="id", all=TRUE) + test(2358.5, options=c(datatable.optimize=2L), + merged$gforce_mean, merged$true_mean ) }) From 4941d97176bd4acf7dc963496363eed84934eaa3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 8 Jan 2026 11:16:46 -0800 Subject: [PATCH 10/11] eliminate intermediate variable --- src/gsumm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 902a88e707..ed0bc1b56c 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -502,11 +502,10 @@ SEXP gsum(SEXP x, SEXP narmArg) const int64_t *my_gx = gx + b*batchSize + pos; const uint16_t *my_low = low + b*batchSize + pos; for (int i=0; i Date: Thu, 8 Jan 2026 11:18:43 -0800 Subject: [PATCH 11/11] NEWS again --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index cd3c5bd69b..256c7450ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -34,7 +34,7 @@ 3. `fread("file://...")` works for file URIs with spaces, [#7550](https://github.com/Rdatatable/data.table/issues/7550). Thanks @aitap for the report and @MichaelChirico for the PR. -4. `sum()` by group is correct with GFoce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups. +4. `sum()` by group is correct with missing entries and GForce activated ([#7571](https://github.com/Rdatatable/data.table/issues/7571)). Thanks to @rweberc for the report and @manmita for the fix. The issue was caused by a faulty early `break` that spilled between groups, and resulted in silently incorrect results! ## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025