Rdatatable · MichaelChirico · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026 · Jan 10, 2026
@@ -14,7 +14,7 @@
 
 ### NEW FEATURES
 
-1. `nafill()`, `setnafill()` extended to work on logical and factor vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). Thanks @jangorecki for the request and @MichaelChirico for the PR.
+1. `nafill()`, `setnafill()` extended to work on logical and factor vectors (part of [#3992](https://github.com/Rdatatable/data.table/issues/3992)). `nafill()` works for character vectors, but not yet `setnafill()`. Thanks @jangorecki for the request and @jangorecki and @MichaelChirico for the PRs.
 
 2. `[,showProgress=]` and `options(datatable.showProgress)` now accept an integer to control the progress bar update interval in seconds, allowing finer control over progress reporting frequency; `TRUE` uses the default 3-second interval, [#6514](https://github.com/Rdatatable/data.table/issues/6514). Thanks @ethanbsmith for the report and @ben-schwen for the PR.
 

@@ -112,8 +112,8 @@ x = 1:10
 test(3.01, nafill(x, "locf", fill=0L), x)
 test(3.02, setnafill(list(copy(x)), "locf", fill=0L), list(x))
 test(3.03, setnafill(x, "locf"), error="in-place update is supported only for list")
-test(3.04, nafill(letters[1:5], fill=0), error="must be logical/numeric type, or list/data.table")
-test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be logical/numeric type, or list/data.table")
+test(3.04, nafill(as.raw(x), fill=0), error="not supported")
+test(3.05, setnafill(list(as.raw(x)), fill=0), error="not supported")
 test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1.*fcoalesce")
 test(3.07, nafill(x, "locf", fill=1:2), error="fill must be a vector of length 1.*x\\.$")
 test(3.08, nafill(x, fill="asd"), x, warning=c("Coercing.*character.*integer","NAs introduced by coercion"))
@@ -324,42 +324,107 @@ test(11.09, coerceAs(1L, a), error="must not be matrix or array")
 x = c(NA, NA, TRUE, FALSE, NA, NA, FALSE, TRUE, NA, NA)
 test(12.01, nafill(x, "locf"), c(NA, NA, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE))
 test(12.02, nafill(x, "nocb"), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, NA, NA))
-test(12.03, nafill(x, fill=TRUE), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE))
-test(12.04, nafill(x, fill=0L), c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE))
-test(12.05, nafill(x, fill=5.0), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE), warning="double.*taken as TRUE")
-test(12.06, nafill(x, fill=Inf), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE), warning="double.*taken as TRUE")
-test(12.07, nafill(x, fill=NA), x)
-test(12.08, nafill(x, fill=NA_integer_), x)
-test(12.09, nafill(x, fill=NA_real_), x)
-test(12.10, nafill(x, fill=NaN), x)
+test(12.03, nafill(x, "locf", fill=TRUE), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE))
+test(12.04, nafill(x, "nocb", fill=TRUE), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE))
+test(12.05, nafill(x, fill=TRUE), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE))
+test(12.06, nafill(x, fill=0L), c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE))
+test(12.07, nafill(x, fill=5.0), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE), warning="double.*taken as TRUE")
+test(12.08, nafill(x, fill=Inf), c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE), warning="double.*taken as TRUE")
+test(12.09, nafill(x, fill=NA), x)
+test(12.10, nafill(x, fill=NA_integer_), x)
+test(12.11, nafill(x, fill=NA_real_), x)
+test(12.12, nafill(x, fill=NaN), x)
 
 ## factor input
 x = rep(NA_character_, 10L)
 x[c(3:4, 7:8)] = c("a", "b", "a", "c")
 x = as.factor(x)
 test(13.01, nafill(x, "locf"), replace(replace(x, 5:6, "b"), 9:10, "c"))
 test(13.02, nafill(x, "nocb"), replace(x, c(1:2, 5:6), "a"))
+test(13.03, nafill(x, "locf", fill="b"), replace(replace(x, c(1:2, 5:6), "b"), 9:10, "c"))
+test(13.04, nafill(x, "nocb", fill="a"), replace(x, c(1:2, 5:6, 9:10), "a"))
 x_fill_a = replace(x, c(1:2, 5:6, 9:10), "a")
-test(13.03, nafill(x, fill="a"), x_fill_a)
-test(13.04, nafill(x, fill=1L), x_fill_a)
-test(13.05, nafill(x, fill=1.0), x_fill_a)
-test(13.06, nafill(x, fill=factor("a")), x_fill_a)
-test(13.07, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a)
-test(13.08, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a)
-test(13.09, nafill(x, fill=factor("a", levels=c("a", "d"))), factor(x_fill_a, levels=c("a", "b", "c", "d")))
+test(13.05, nafill(x, fill="a"), x_fill_a)
+test(13.06, nafill(x, fill=1L), x_fill_a)
+test(13.07, nafill(x, fill=1.0), x_fill_a)
+test(13.08, nafill(x, fill=factor("a")), x_fill_a)
+test(13.09, nafill(x, fill=factor("a", levels=levels(x))), x_fill_a)
+test(13.10, nafill(x, fill=factor("a", levels=c("a", "b"))), x_fill_a)
+test(13.11, nafill(x, fill=factor("a", levels=c("a", "d"))), factor(x_fill_a, levels=c("a", "b", "c", "d")))
 x_fill_d = replace(factor(x, levels = c(levels(x), "d")), c(1:2, 5:6, 9:10), "d")
-test(13.10, nafill(x, fill="d"), x_fill_d)
-test(13.11, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_d)
-test(13.12, nafill(x, fill=factor("d", levels=c("d", "a", "b", "c"))), x_fill_d)
-test(13.13, nafill(x, fill=factor("d", levels=c("d", "c", "b", "a"))), x_fill_d)
-test(13.14, nafill(x, fill=factor("d", levels=c("b", "c", "d"))), x_fill_d)
-test(13.15, nafill(x, fill=NA), x)
-test(13.16, nafill(x, fill=NA_integer_), x)
-test(13.17, nafill(x, fill=NA_character_), x)
+test(13.12, nafill(x, fill="d"), x_fill_d)
+test(13.13, nafill(x, fill=factor("d", levels=c("a", "b", "c", "d"))), x_fill_d)
+test(13.14, nafill(x, fill=factor("d", levels=c("d", "a", "b", "c"))), x_fill_d)
+test(13.15, nafill(x, fill=factor("d", levels=c("d", "c", "b", "a"))), x_fill_d)
+test(13.16, nafill(x, fill=factor("d", levels=c("b", "c", "d"))), x_fill_d)
+test(13.17, nafill(x, fill=NA), x)
+test(13.18, nafill(x, fill=NA_integer_), x)
+test(13.19, nafill(x, fill=NA_real_), x)
+test(13.20, nafill(x, fill=NA_character_), x)
+
+## character input
+x = c(NA, NA, "a", "b", NA, NA, "c","d", NA, NA)
+test(14.01, nafill(x, fill="unknown"), c("unknown", "unknown", "a", "b", "unknown", "unknown", "c", "d", "unknown", "unknown"))
+test(14.02, nafill(x, fill=NA), x)
+test(14.03, nafill(x, "locf"), c(NA, NA, "a", "b", "b", "b", "c", "d", "d", "d"))
+test(14.04, nafill(x, "nocb"), c("a", "a", "a", "b", "c", "c", "c", "d", NA, NA))
+test(14.05, nafill(x, "locf", fill="unknown"), c("unknown", "unknown", "a", "b", "b", "b", "c", "d", "d", "d"))
+test(14.06, nafill(x, "nocb", fill="unknown"), c("a", "a", "a", "b", "c", "c", "c", "d", "unknown", "unknown"))
+test(14.07, nafill(x, fill=TRUE), c("TRUE", "TRUE", "a", "b", "TRUE", "TRUE", "c", "d", "TRUE", "TRUE"))
+test(14.08, nafill(x, fill=1L), c("1", "1", "a", "b", "1", "1", "c", "d", "1", "1"))
+test(14.09, nafill(x, fill=1.0), c("1", "1", "a", "b", "1", "1", "c", "d", "1", "1"))
+test(14.10, nafill(x, fill=NA_integer_), x)
+test(14.11, nafill(x, fill=NA_real_), x)
+test(14.12, nafill(x, fill=NA_character_), x)
+test(14.13, options=c(datatable.verbose=TRUE),
+     nafill(x, fill="z"), c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z"),
+     output="nafillString: took")
+
+## setnafill
+DT = data.table(l1=c(NA, NA, TRUE, TRUE, NA, NA, FALSE, FALSE, NA, NA),
+                l2=c(NA, NA, FALSE, FALSE, NA, NA, TRUE, TRUE, NA, NA),
+                i1=c(NA, NA, 0:1, NA, NA, 2:3, NA, NA),
+                i2=c(NA, NA, 3:2, NA, NA, 1:0, NA, NA),
+                d1=c(NA, NA, 0.0, 1L, NA, NA, 2:3, NA, NA),
+                d2=c(NA, NA, 3.0, 2L, NA, NA, 1:0, NA, NA),
+                f1=as.factor(c(NA, NA, "a", "b", NA, NA, "b", "c", NA, NA)),
+                f2=as.factor(c(NA, NA, "c", "b", NA, NA, "b", "a", NA, NA)),
+                c1=c(NA, NA, "a", "b", NA, NA, "c", "d", NA, NA),
+                c2=c(NA, NA, "d", "c", NA, NA, "b", "a", NA, NA))
+test(15.01, setnafill(copy(DT), fill=TRUE, cols='l1')$l1,
+            c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE))
+test(15.02, setnafill(copy(DT), fill=TRUE, cols=c('l1', 'l2'))[, .(l1, l2)],
+            data.table(l1=c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE),
+                       l2=c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)))
+test(15.03, setnafill(copy(DT), fill=9L, cols='i1')$i1,
+            c(9L, 9L, 0:1, 9L, 9L, 2:3, 9L, 9L))
+test(15.04, setnafill(copy(DT), fill=9L, cols=c('i1', 'i2'))[, .(i1, i2)],
+            data.table(i1=c(9L, 9L, 0:1, 9L, 9L, 2:3, 9L, 9L),
+                       i2=c(9L, 9L, 3:2, 9L, 9L, 1:0, 9L, 9L)))
+test(15.05, setnafill(copy(DT), fill=9.0, cols='d1')$d1,
+            c(9.0, 9L, 0:1, 9L, 9L, 2:3, 9L, 9L))
+test(15.06, setnafill(copy(DT), fill=9.0, cols=c('d1', 'd2'))[, .(d1, d2)],
+            data.table(d1=c(9.0, 9L, 0:1, 9L, 9L, 2:3, 9L, 9L),
+                       d2=c(9.0, 9L, 3:2, 9L, 9L, 1:0, 9L, 9L)))
+test(15.07, setnafill(copy(DT), fill="a", cols='f1')$f1,
+            as.factor(c("a", "a", "a", "b", "a", "a", "b", "c", "a", "a")))
+test(15.08, setnafill(copy(DT), fill="a", cols=c('f1', 'f2'))[, .(f1, f2)],
+            data.table(f1=as.factor(c("a", "a", "a", "b", "a", "a", "b", "c", "a", "a")),
+                       f2=as.factor(c("a", "a", "c", "b", "a", "a", "b", "a", "a", "a"))))
+test(15.09, setnafill(DT, fill="z", cols='c1'), error="not yet supported")
+# test(15.10, setnafill(copy(DT), fill="z", cols=c('c1', 'c2'))[, .(c1, c2)],
+#             data.table(c1=c("z", "z", "a", "b", "z", "z", "c", "d", "z", "z"),
+#                        c2=c("z", "z", "d", "c", "z", "z", "b", "a", "z", "z")))
+test(15.11, setnafill(copy(DT), fill=list(TRUE, 9L, 9.0, "a"), cols=c("l1", "i1", "d1", "f1"))[, .(l1, i1, d1, f1)],
+            data.table(l1=c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE),
+                       i1=c(9L, 9L, 0:1, 9L, 9L, 2:3, 9L, 9L),
+                       d1=c(9.0, 9L, 0L, 1L, 9L, 9L, 2:3, 9L, 9L),
+                       f1=as.factor(c("a", "a", "a", "b", "a", "a", "b", "c", "a", "a"))))
+test(15.12, setnafill(DT, cols=c("l1", "c1")), error="not yet supported")
+DT = data.table(l=c(NA, FALSE), i=c(NA, 0L))
+setnafill(DT, fill=list(TRUE, 1L))
+test(15.13, DT, data.table(l=c(TRUE, FALSE), i=1:0))
 
-## logical
-## character
-## factor
 ## Date
 ## POSIXct
 ## IDate

@@ -87,6 +87,31 @@ void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fi
     snprintf(ans->message[0], 500, _("%s: took %.3fs\n"), __func__, omp_get_wtime()-tic);
 }
 
+void nafillString(const SEXP *x, uint_fast64_t nx, unsigned int type, SEXP fill, ans_t *ans, bool verbose) {
+  double tic=0.0;
+  if (verbose)
+    tic = omp_get_wtime();
+  if (type==0) { // const 1Code has comments. Press enter to view.
+    for (uint_fast64_t i=0; i<nx; i++) {
+      SET_STRING_ELT(ans->char_v, i, x[i]==NA_STRING ? fill : x[i]);
+    }
+  } else if (type==1) { // locf
+    SET_STRING_ELT(ans->char_v, 0, x[0]==NA_STRING ? fill : x[0]);
+    const SEXP* thisans = SEXPPTR_RO(ans->char_v); // takes out STRING_ELT from loop
+    for (uint_fast64_t i=1; i<nx; i++) {
+      SET_STRING_ELT(ans->char_v, i, x[i]==NA_STRING ? thisans[i-1] : x[i]);
+    }
+  } else if (type==2) { // nocb
+    SET_STRING_ELT(ans->char_v, nx-1, x[nx-1]==NA_STRING ? fill : x[nx-1]);
+    const SEXP* thisans = SEXPPTR_RO(ans->char_v); // takes out STRING_ELT from loop
+    for (int_fast64_t i=nx-2; i>=0; i--) {
+      SET_STRING_ELT(ans->char_v, i, x[i]==NA_STRING ? thisans[i+1] : x[i]);
+    }
+  }
+  if (verbose)
+    snprintf(ans->message[0], 500, _("%s: took %.3fs\n"), __func__, omp_get_wtime()-tic);
+}
+
 /*
   OpenMP is being used here to parallelize the loop that fills missing values
     over columns of the input data. This includes handling different data types
@@ -113,28 +138,31 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
   if (obj_scalar) {
     if (binplace)
       error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table"));
-    else if (!isReal(obj) && TYPEOF(obj) != INTSXP && !isLogical(obj))
-      error(_("'x' argument must be logical/numeric type, or list/data.table of logical/numeric types"));
+    else if (!isReal(obj) && TYPEOF(obj) != INTSXP && !isLogical(obj) && !isString(obj))
+      error(_("'x' argument (type %s) not supported."), type2char(TYPEOF(obj)));
     SEXP obj1 = obj;
     obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
     SET_VECTOR_ELT(obj, 0, obj1);
   }
   SEXP ricols = PROTECT(colnamesInt(obj, cols, /* check_dups= */ ScalarLogical(TRUE), /* skip_absent= */ ScalarLogical(FALSE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
   x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
   int *icols = INTEGER(ricols);
+  bool any_char = false;
   for (int i=0; i<length(ricols); i++) {
     SEXP this_col = VECTOR_ELT(obj, icols[i]-1);
-    if (!isReal(this_col) && TYPEOF(this_col) != INTSXP && !isLogical(this_col))
-      error(_("'x' argument must be logical/numeric type, or list/data.table of logical/numeric types"));
+    if (isString(this_col)) {
+      any_char = true;
+    } else if (!isReal(this_col) && TYPEOF(this_col) != INTSXP && !isLogical(this_col))
+      error(_("'x' argument (type %s) not supported."), type2char(TYPEOF(this_col)));
     SET_VECTOR_ELT(x, i, this_col);
   }
   R_len_t nx = length(x);
 
   double **dx = (double**)R_alloc(nx, sizeof(*dx));
   int32_t **ix = (int32_t**)R_alloc(nx, sizeof(*ix));
+  const SEXP **sx = (const SEXP**)R_alloc(nx, sizeof(SEXP*));
   int64_t **i64x = (int64_t**)R_alloc(nx, sizeof(*i64x));
   uint_fast64_t *inx = (uint_fast64_t*)R_alloc(nx, sizeof(*inx));
-  SEXP ans = R_NilValue;
   ans_t *vans = (ans_t *)R_alloc(nx, sizeof(*vans));
   for (R_len_t i=0; i<nx; i++) {
     const SEXP xi = VECTOR_ELT(x, i);
@@ -143,21 +171,40 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
     if (isReal(xi)) {
       dx[i] = REAL(xi);
       i64x[i] = (int64_t *)REAL(xi);
-      ix[i] = NULL;
+      ix[i] = NULL; sx[i] = NULL;
+    } else if (isString(xi)) {
+      sx[i] = STRING_PTR_RO(xi);
+      ix[i] = NULL; dx[i] = NULL; i64x[i] = NULL;
     } else {
       ix[i] = INTEGER(xi);
-      dx[i] = NULL;
-      i64x[i] = NULL;
+      dx[i] = NULL; sx[i] = NULL; i64x[i] = NULL;
     }
   }
+  SEXP ans = R_NilValue;
   if (!binplace) {
     ans = PROTECT(allocVector(VECSXP, nx)); protecti++;
     for (R_len_t i=0; i<nx; i++) {
       SET_VECTOR_ELT(ans, i, allocVector(TYPEOF(VECTOR_ELT(x, i)), inx[i]));
       const SEXP ansi = VECTOR_ELT(ans, i);
-      const void *p = isReal(ansi) ? (void *)REAL(ansi) : (void *)INTEGER(ansi);
-      vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .status=0, .message={"\0","\0","\0","\0"} });
+      const void *p;
+      switch (TYPEOF(ansi)) {
+        case LGLSXP:
+          p = LOGICAL(ansi);
+          break;
+        case INTSXP:
+          p = INTEGER(ansi);
+          break;
+        case REALSXP:
+          p = REAL(ansi);
+          break;
+        default:
+          p = ansi;
+          break;
+      }
+      vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .char_v=(SEXP)p, .status=0, .message={"\0","\0","\0","\0"} });
     }
+  } else if (any_char) {
+    error(_("In-place filling of character columns is not yet supported."));
   } else {
     for (R_len_t i=0; i<nx; i++) {
       vans[i] = ((ans_t) { .dbl_v=dx[i], .int_v=ix[i], .int64_v=i64x[i], .status=0, .message={"\0","\0","\0","\0"} });
@@ -200,7 +247,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
       fillp[i] = SEXPPTR_RO(VECTOR_ELT(fill, i)); // do like this so we can use in parallel region
     }
   }
-  #pragma omp parallel for if (nx>1) num_threads(getDTthreads(nx, true))
+  #pragma omp parallel for if (nx>1 && !any_char) num_threads(getDTthreads(nx, true))
   for (R_len_t i=0; i<nx; i++) {
     switch (TYPEOF(VECTOR_ELT(x, i))) {
     case REALSXP : {
@@ -213,6 +260,9 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
     case LGLSXP: case INTSXP : {
       nafillInteger(ix[i], inx[i], itype, hasFill ? ((int32_t *)fillp[i])[0] : NA_INTEGER, &vans[i], verbose);
     } break;
+    case STRSXP : {
+      nafillString(sx[i], inx[i], itype, hasFill ? ((SEXP *)fillp[i])[0] : NA_STRING, &vans[i], verbose);
+    } break;
     }
   }
 

@@ -10,7 +10,7 @@ typedef struct ans_t {
   int32_t *int_v;        // used in nafill
   double *dbl_v;         // used in froll, nafill
   int64_t *int64_v;      // used in nafill
-  //void *char_v;          // to be used in nafill but then must escape parallelism
+  void *char_v;          // ineligible for filling in parallel!
   uint8_t status;        // 0:ok, 1:message, 2:warning, 3:error; unix return signal: {0,1,2}=0, {3}=1
   char message[4][ANS_MSG_SIZE]; // STDOUT: output, STDERR: message, warning, error
 // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586