Skip to content

Commit 3c06138

Browse files
authored
set automatically allocates new column slots if needed (#7538)
* set automatically allocates new column slots if needed * use GetOption1 instead of GetOption * fix test * change froll test * remove assign change * add output statements to test loop * add helper function
1 parent 7e9907f commit 3c06138

File tree

4 files changed

+26
-4
lines changed

4 files changed

+26
-4
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828

2929
1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
3030

31+
2. `set()` now automatically pre-allocates new column slots if needed, similar to what `:=` already does, [#1831](https://github.com/Rdatatable/data.table/issues/1831) [#4100](https://github.com/Rdatatable/data.table/issues/4100). Thanks to @zachokeeffe and @tyner for the report and @ben-schwen for the fix.
32+
3133
## data.table [v1.18.0](https://github.com/Rdatatable/data.table/milestone/37?closed=1) 23 December 2025
3234

3335
### BREAKING CHANGE

R/data.table.R

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2854,10 +2854,20 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL, skip_absent=
28542854
invisible(x)
28552855
}
28562856

2857+
.set_needs_alloccol = function(x, value) {
2858+
# automatically allocate more space when tl <= ncol (either full or loaded from disk)
2859+
if (truelength(x) <= length(x)) return(TRUE)
2860+
if (selfrefok(x, verbose=FALSE) >= 1L) return(FALSE)
2861+
# value can be NULL or list with NULLs inside
2862+
if (is.null(value)) return(TRUE)
2863+
if (!is.list(value)) return(FALSE)
2864+
any(vapply_1b(value, is.null))
2865+
}
2866+
28572867
set = function(x,i=NULL,j,value) # low overhead, loopable
28582868
{
28592869
# If removing columns from a table that's not selfrefok, need to call setalloccol first, #7488
2860-
if ((is.null(value) || (is.list(value) && any(vapply_1b(value, is.null)))) && selfrefok(x, verbose=FALSE) < 1L) {
2870+
if (.set_needs_alloccol(x, value)) {
28612871
name = substitute(x)
28622872
setalloccol(x, verbose=FALSE)
28632873
if (is.name(name)) {

inst/tests/froll.Rraw

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2084,7 +2084,8 @@ if (use.fork) {
20842084
test(6010.772, .selfref.ok(ans[[2L]]))
20852085
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=FALSE)
20862086
test(6010.773, !.selfref.ok(ans[[2L]][[1L]]))
2087-
test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
2087+
# deactivated by #5443
2088+
# test(6010.7731, set(ans[[2L]][[1L]],, "newcol", 1L), error="data.table has either been loaded from disk")
20882089
ans = lapply(ans, lapply, setDT)
20892090
test(6010.774, .selfref.ok(ans[[2L]][[1L]])) ## fix after
20902091
ans = frollapply(1:2, 2, function(x) list(data.table(x)), fill=list(data.table(NA)), simplify=function(x) lapply(x, lapply, setDT))

inst/tests/tests.Rraw

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14797,7 +14797,7 @@ test(2016.1, name, "DT")
1479714797
test(2016.2, DT, data.table(a=1:3))
1479814798
test(2016.3, DT[2,a:=4L], data.table(a=INT(1,4,3))) # no error for := when existing column
1479914799
test(2016.4, set(DT,3L,1L,5L), data.table(a=INT(1,4,5))) # no error for set() when existing column
14800-
test(2016.5, set(DT,2L,"newCol",5L), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first") # just set()
14800+
test(2016.5, set(DT,2L,"newCol",5L), data.table(a=INT(1,4,5), newCol=INT(NA,5L,NA))) # works since set overallocates #4100
1480114801
test(2016.6, DT[2,newCol:=6L], data.table(a=INT(1,4,5), newCol=INT(NA,6L,NA))) # := ok (it changes DT in caller)
1480214802
unlink(tt)
1480314803

@@ -19478,7 +19478,7 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in
1947819478
df = data.frame(a=1:3)
1947919479
setDT(df)
1948019480
attr(df, "att") = 1
19481-
test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first")
19481+
test(2291.1, set(df, NULL, "new", "new"), setattr(data.table(a=1:3, new="new"), "att", 1)) # fixed when calling setalloccol before set #4100
1948219482

1948319483
# ns-qualified bysub error, #6493
1948419484
DT = data.table(a = 1)
@@ -21959,3 +21959,12 @@ test(2355.1, fread(txt, skip=0), data.table(V1 = c("b1", "c1"), a1
2195921959
test(2355.2, fread(txt, skip=0, header=TRUE), data.table(V1 = c("b1", "c1"), a1 = c("b2", "c2"), a2 = c("b3", "c3")), warning="Added an extra default column name")
2196021960
test(2355.3, fread(txt, skip=0, header=FALSE), data.table(V1=character(), V2=character(), V3=character()), warning="Consider fill=TRUE")
2196121961
test(2355.4, fread(txt, skip=0, fill=TRUE), data.table(V1 = c("a1", "b1", "c1"), V2 = c("a2", "b2", "c2"), V3 = c("", "b3", "c3")))
21962+
21963+
# re-overallocate in set if quota is reached #496 #1831 #4100
21964+
DT = data.table()
21965+
test(2356.1, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = paste0("V",i), value = i); ncol(DT)}, 10L)
21966+
DT = structure(list(a = 1, b = 2), class = c("data.table", "data.frame"))
21967+
test(2356.2, options=c(datatable.alloccol=1L), set(DT, j="c", value=3), data.table(a=1, b=2, c=3))
21968+
# ensure := and set are consistent if they need to overallocate
21969+
DT = data.table(); DT2 = data.table()
21970+
test(2356.3, options=c(datatable.alloccol=1L), {for (i in seq(10L)) set(DT, j = sprintf("V%d",i), value = i); DT}, {for (i in seq(10)) DT2[, sprintf("V%d",i) := i]; DT2})

0 commit comments

Comments
 (0)