From cea4bbf06ebc969f2a6d0351e86f552fdda9d5dd Mon Sep 17 00:00:00 2001 From: Ivan K Date: Fri, 19 Dec 2025 14:49:44 +0300 Subject: [PATCH 1/2] dogroups: don't access columns beyond end of table Previously, dogroups() could try to read elements after the (resized) end of over-allocated data.table list, expecting them to be NULL. This didn't crash in practice, but is now explicitly checked for (and disallowed). --- src/dogroups.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dogroups.c b/src/dogroups.c index 740f32f85..373242516 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -308,7 +308,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX error(_("RHS of := is NULL during grouped assignment, but it's not possible to delete parts of a column.")); int vlen = length(RHS); if (vlen>1 && vlen!=grpn) { - SEXP colname = isNull(VECTOR_ELT(dt, INTEGER(lhs)[j]-1)) ? STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1) : STRING_ELT(dtnames,INTEGER(lhs)[j]-1); + SEXP colname = INTEGER(lhs)[j] > LENGTH(dt) ? STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1) : STRING_ELT(dtnames,INTEGER(lhs)[j]-1); error(_("Supplied %d items to be assigned to group %d of size %d in column '%s'. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."),vlen,i+1,grpn,CHAR(colname)); // e.g. in #91 `:=` did not issue recycling warning during grouping. Now it is error not warning. } @@ -316,9 +316,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX int n = LENGTH(VECTOR_ELT(dt, 0)); for (int j=0; j= LENGTH(dt)) { // first time adding to new column if (R_maxLength(dt) < colj+1) internal_error(__func__, "Trying to add new column by reference but tl is full; setalloccol should have run first at R level before getting to this point"); // # nocov target = PROTECT(allocNAVectorLike(RHS, n)); @@ -332,7 +331,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX UNPROTECT(1); SET_STRING_ELT(dtnames, colj, STRING_ELT(newnames, colj-origncol)); copyMostAttrib(RHS, target); // attributes of first group dominate; e.g. initial factor levels come from first group - } + } else + target = VECTOR_ELT(dt, colj); bool copied = false; if (isNewList(target) && anySpecialStatic(RHS, specials)) { // see comments in anySpecialStatic() RHS = PROTECT(copyAsPlain(RHS)); From 20154a3aec1400c2e2373efd118a4aca400a0437 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger Date: Fri, 19 Dec 2025 14:06:34 +0100 Subject: [PATCH 2/2] add NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 107795c91..bec4ada86 100644 --- a/NEWS.md +++ b/NEWS.md @@ -352,6 +352,8 @@ See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. T 26. Grouping by a factor with many groups is now fast again, fixing a timing regression introduced in [#6890](https://github.com/Rdatatable/data.table/pull/6890) where UTF-8 coercion and level remapping were performed unnecessarily, [#7404](https://github.com/Rdatatable/data.table/issues/7404). Thanks @ben-schwen for the report and fix. +27. `dogroups()` no longer reads beyond the resized end of over-allocated data.table list columns, [#7486](https://github.com/Rdatatable/data.table/issues/7486). While this didn't crash in practice, it is now explicitly checked for in recent R versions (r89198+). Thanks @TimTaylor and @aitap for the report and @aitap for the fix. + ### NOTES 1. The following in-progress deprecations have proceeded: