From 7967889f2fcd7611debb3127d2fc22c67c2e180b Mon Sep 17 00:00:00 2001 From: badasahog <52379863+badasahog@users.noreply.github.com> Date: Tue, 1 Jul 2025 07:22:14 -0400 Subject: [PATCH] made todo labels consistent --- src/assign.c | 14 +++++++------- src/between.c | 2 +- src/bmerge.c | 14 +++++++------- src/data.table.h | 2 +- src/dogroups.c | 18 +++++++++--------- src/fastmean.c | 2 +- src/fcast.c | 4 ++-- src/fmelt.c | 14 +++++++------- src/forder.c | 41 +++++++++++++++++++++-------------------- src/fread.c | 30 +++++++++++++++--------------- src/freadR.c | 6 +++--- src/fsort.c | 20 ++++++++++---------- src/fwrite.c | 4 ++-- src/fwriteR.c | 2 +- src/gsumm.c | 24 ++++++++++++------------ src/ijoin.c | 10 +++++----- src/init.c | 2 +- src/nafill.c | 2 +- src/nqrecreateindices.c | 4 ++-- src/rbindlist.c | 2 +- src/reorder.c | 2 +- src/subset.c | 6 +++--- src/types.c | 2 +- src/uniqlist.c | 8 ++++---- src/utils.c | 2 +- src/wrappers.c | 2 +- 26 files changed, 120 insertions(+), 119 deletions(-) diff --git a/src/assign.c b/src/assign.c index b7e13aa4fe..178f0fc479 100644 --- a/src/assign.c +++ b/src/assign.c @@ -78,7 +78,7 @@ closest I got to getting it to pass all tests : setAttrib(x, SelfRefSymbol, p = R_MakeExternalPtr( R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot R_NilValue, //getAttrib(x, R_NamesSymbol), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...) - PROTECT( // needed when --enable-strict-barrier it seems, iiuc. TO DO: test under that flag and remove if not needed. + PROTECT( // needed when --enable-strict-barrier it seems, iiuc. todo: test under that flag and remove if not needed. env // wrap x in env to avoid an infinite loop in object.size() if prot=x were here ) )); @@ -151,10 +151,10 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) int protecti=0; - SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? + SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // todo: use growVector here? SHALLOW_DUPLICATE_ATTRIB(newdt, dt); - // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It + // todo: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It // also increases truelength. Perhaps make that distinction, then, and split out, but marked // so that the next change knows to duplicate. // keepattr() also merely points to the entire attributes list and thus doesn't allow replacing @@ -255,7 +255,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose) if (!selfrefok(dt,verbose)) return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2 // added (n>l) ? ... for #970, see test 1481. - // TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example. + // todo: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example. // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl) // internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov @@ -377,7 +377,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) if (rowsd[i]>=0) numToDo++; } if (verbose) Rprintf(_("Assigning to %d row subset of %d rows\n"), numToDo, nrow); - // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i) + // todo: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i) if (numToDo==0) { // isString(cols) is exclusive to calls from set() if (!length(newcolnames) && !isString(cols)) { @@ -506,7 +506,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // modify DT by reference. Other than if new columns are being added and the allocVec() fails with // out-of-memory. In that case the user will receive hard halt and know to rerun. if (length(newcolnames)) { - oldtncol = TRUELENGTH(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more. + oldtncol = TRUELENGTH(dt); // todo: oldtncol can be just called tl now, as we won't realloc here any more. if (oldtncol0) diff --git a/src/bmerge.c b/src/bmerge.c index 24a00fb870..e9a5865866 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -152,7 +152,7 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r } else { // equi joins (or) non-equi join but no multiple matches retFirstArg = PROTECT(allocVector(INTSXP, anslen)); retFirst = INTEGER(retFirstArg); - retLengthArg = PROTECT(allocVector(INTSXP, anslen)); // TODO: no need to allocate length at all when + retLengthArg = PROTECT(allocVector(INTSXP, anslen)); // todo: no need to allocate length at all when retLength = INTEGER(retLengthArg); // mult = "first" / "last" retIndexArg = PROTECT(allocVector(INTSXP, 0)); retIndex = INTEGER(retIndexArg); @@ -162,7 +162,7 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r // defaults need to populated here as bmerge_r may well not touch many locations, say if the last row of i is before the first row of x. retFirst[j] = nomatch; // default to no match for NA goto below } - // retLength[j] = 0; // TO DO: do this to save the branch below and later branches at R level to set .N to 0 + // retLength[j] = 0; // todo: do this to save the branch below and later branches at R level to set .N to 0 int retLengthVal = (int)(nomatch != 0); for (int j=0; j0, int, 0, 0, ival) // NA_STRING are allowed and joined to; does not do ENC2UTF8 again inside StrCmp - // TO DO: deal with mixed encodings and locale optionally; could StrCmp non-ascii in a thread-safe non-alloc manner + // todo: deal with mixed encodings and locale optionally; could StrCmp non-ascii in a thread-safe non-alloc manner } break; case REALSXP : if (INHERITS(xc, char_integer64)) { @@ -391,7 +391,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg const double *icv = REAL(ic); const double *xcv = REAL(xc); const double ival = icv[ir]; - const uint64_t ivalt = dtwiddle(ival); // TO: remove dtwiddle by dealing with NA, NaN, -Inf, +Inf up front + const uint64_t ivalt = dtwiddle(ival); // todo: remove dtwiddle by dealing with NA, NaN, -Inf, +Inf up front #undef ISNAT #undef WRAP #define ISNAT(x) (ISNAN(x)) diff --git a/src/data.table.h b/src/data.table.h index 56b3e34dc0..fb86ed89cb 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -70,7 +70,7 @@ // always be checked in UTF8 locale. This seems to be the best fix Arun could think of to put the encoding issues to rest. // Since the if-statement will fail with the first condition check in "normal" ASCII cases, there shouldn't be huge penalty issues in // most cases. Fix for #66, #69, #469 and #1293 -// TODO: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings, and again after Jan 2018 when made macro. +// todo: compare 1.9.6 performance with 1.9.7 with huge number of ASCII strings, and again after Jan 2018 when made macro. // Matt moved this to be macro in Jan 2018 so that branch can benefit from branch prediction too wherever used inside loops. // This IS_ASCII will dereference s and that cache fetch is the part that may bite more than the branch, though. Without a call to // to ENC2UTF as all, the pointer value can just be compared by the calling code without dereferencing it. It may still be worth diff --git a/src/dogroups.c b/src/dogroups.c index d4f774568b..9c4e51c548 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -92,11 +92,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress defineVar(sym_BY, BY = PROTECT(allocVector(VECSXP, ngrpcols)), env); nprotect++; // PROTECT for rchk - SEXP bynames = PROTECT(allocVector(STRSXP, ngrpcols)); nprotect++; // TO DO: do we really need bynames, can we assign names afterwards in one step? + SEXP bynames = PROTECT(allocVector(STRSXP, ngrpcols)); nprotect++; // todo: do we really need bynames, can we assign names afterwards in one step? for (int i=0; i0 || !isNull(lhs)) with i>0 to fix #49 // The above is now to fix #1993, see test 1746. // In cases were no i rows match, '|| estn>-1' ensures that the last empty group creates an empty result. - // TODO: revisit and tidy + // todo: revisit and tidy if (!isNull(lhs) && (istarts[i] == NA_INTEGER || @@ -227,7 +227,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // Or in the words, this entire section, and this entire dogroups.c file, is now write-barrier compliant from v1.12.10 // and we hope that reference counting on by default from R 4.0 will avoid costly gc()s. } - grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit. + grpn = 1; // it may not be 1 e.g. test 722. todo: revisit. SETLENGTH(I, grpn); INTEGER(I)[0] = 0; for (int j=0; j // the debugging machinery + breakpoint aidee // raise(SIGINT); -// TO DO: margins +// todo: margins SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fill, SEXP fill_d, SEXP is_agg, SEXP some_fillArg) { int nrows=INTEGER(nrowArg)[0], ncols=INTEGER(ncolArg)[0]; int nlhs=length(lhs), nval=length(val), *idx = INTEGER(idxArg); @@ -127,7 +127,7 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil // if (TYPEOF(env) != ENVSXP) error(_("Argument 'env' to (data.table internals) 'cast_order' must be an environment")); // if (TYPEOF(v) == VECSXP) len = length(VECTOR_ELT(v, 0)); // else len = length(v); -// PROTECT(call = lang2(install("forder"), v)); // TODO: save the 'eval' by calling directly the C-function. +// PROTECT(call = lang2(install("forder"), v)); // todo: save the 'eval' by calling directly the C-function. // ans = PROTECT(eval(call, env)); // if (length(ans) == 0) { // forder returns integer(0) if already sorted // UNPROTECT(1); // ans diff --git a/src/fmelt.c b/src/fmelt.c index 33fc6ce1ff..4a12edb0c2 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -17,7 +17,7 @@ SEXP seq_int(int n, int start) { SEXP set_diff(SEXP x, int n) { if (TYPEOF(x) != INTSXP) error(_("'x' must be an integer")); if (n <= 0) error(_("'n' must be a positive integer")); - SEXP table = PROTECT(seq_int(n, 1)); // TODO: using match to 1:n seems odd here, why use match at all + SEXP table = PROTECT(seq_int(n, 1)); // todo: using match to 1:n seems odd here, why use match at all SEXP xmatch = PROTECT(match(x, table, 0)); // Old comment:took a while to realise: matches vec against x - thanks to comment from Matt in assign.c! const int *ixmatch = INTEGER(xmatch); int *buf = (int *) R_alloc(n, sizeof(*buf)); @@ -328,8 +328,8 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna if (data->leach[i] > data->lmax) { data->lmax = data->leach[i]; } - data->isidentical[i] = 1; // TODO - why 1 and not Rboolean TRUE? - data->isfactor[i] = 0; // seems to hold 2 below, so not an Rboolean FALSE here. TODO - better name for variable? + data->isidentical[i] = 1; // todo: why 1 and not Rboolean TRUE? + data->isfactor[i] = 0; // seems to hold 2 below, so not an Rboolean FALSE here. todo: better name for variable? data->maxtype[i] = 0; // R_alloc doesn't initialize so careful to here, relied on below for (int j=0; jleach[i]; ++j) { // for each input column. int this_col_num = INTEGER(tmp)[j]; @@ -386,7 +386,7 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType // Finds unique levels directly in one pass with no need to create hash tables. Creates integer factor // too in the same single pass. Previous version called factor(x, levels=unique) where x was type character // and needed hash table. -// TODO keep the original factor columns as factor and use new technique in rbindlist.c. The calling +// todo: keep the original factor columns as factor and use new technique in rbindlist.c. The calling // environments are a little difference hence postponed for now (e.g. rbindlist calls writeNA which // a general purpose combiner would need to know how many to write) // factorType is 1 for factor and 2 for ordered @@ -549,7 +549,7 @@ SEXP getvaluecols(SEXP DT, SEXP dtnames, Rboolean valfactor, Rboolean verbose, s for (int k=0; knrow; ++k) SET_STRING_ELT(target, j*data->nrow + k, STRING_ELT(thiscol, k)); } break; - //TODO complex value type: case CPLXSXP: { } break; + //todo: complex value type: case CPLXSXP: { } break; case REALSXP : { double *dtarget = REAL(target); const double *dthiscol = REAL_RO(thiscol); @@ -601,7 +601,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str } if (!varfactor) { SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen)); - if (data->lvalues == 1) {//one value column to output. TODO #5247 change to !data->measure_is_list + if (data->lvalues == 1) {//one value column to output. todo: #5247 change to !data->measure_is_list const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0)); for (int j=0, ansloc=0; jlmax; ++j) { const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow; @@ -620,7 +620,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen)); SEXP levels; int *td = INTEGER(target); - if (data->lvalues == 1) {//one value column to output. TODO #5247 change to !data->measure_is_list + if (data->lvalues == 1) {//one value column to output. todo: #5247 change to !data->measure_is_list SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0); int len = length(thisvaluecols); levels = PROTECT(allocVector(STRSXP, len)); protecti++; diff --git a/src/forder.c b/src/forder.c index d296f036d2..5c5201216b 100644 --- a/src/forder.c +++ b/src/forder.c @@ -161,7 +161,7 @@ static void flush(void) { #endif // range_* functions return [min,max] of the non-NAs as common uint64_t type -// TODO parallelize these; not a priority according to TIMING_ON though (contiguous read with prefetch) +// todo: parallelize these; not a priority according to TIMING_ON though (contiguous read with prefetch) static void range_i32(const int32_t *x, const int n, uint64_t *out_min, uint64_t *out_max, int *out_na_count) { @@ -577,7 +577,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA } //Rprintf(_("sortType = %d\n"), sortType); switch(TYPEOF(x)) { - case INTSXP : case LGLSXP : // TODO skip LGL and assume range [0,1] + case INTSXP : case LGLSXP : // todo: skip LGL and assume range [0,1] range_i32(INTEGER(x), nrow, &min, &max, &na_count); break; case CPLXSXP : { @@ -711,7 +711,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA // So, we may as well bit-pack as done above. // Retaining appearance-order within key-byte-column is still advatangeous for when we do encounter column-grouped data (to avoid data movement); and // the ordering will be appearance-order preserving in that case (just at the byte level). - // TODO: in future we could provide an option to return 'any' group order for efficiency, which would be the byte-appearance order. But for now, the + // todo: in future we could provide an option to return 'any' group order for efficiency, which would be the byte-appearance order. But for now, the // the forder afterwards on o__[f__] (in [.data.table) is not significant. switch(TYPEOF(x)) { @@ -720,7 +720,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA #pragma omp parallel for num_threads(getDTthreads(nrow, true)) for (int i=0; i0);} // cumulate through 0's too (won't be used) } else { @@ -1139,7 +1140,7 @@ void radix_r(const int from, const int to, const int radix) { // else parallel batches. This is called recursively but only once or maybe twice before resolving to UINT16_MAX branch above int batchSize = MIN(UINT16_MAX, 1+my_n/getDTthreads(my_n, true)); // (my_n-1)/nBatch + 1; //UINT16_MAX == 65535 - int nBatch = (my_n-1)/batchSize + 1; // TODO: make nBatch a multiple of nThreads? + int nBatch = (my_n-1)/batchSize + 1; // todo: make nBatch a multiple of nThreads? int lastBatchSize = my_n - (nBatch-1)*batchSize; uint16_t *counts = calloc(nBatch*256,sizeof(*counts)); uint8_t *ugrps = malloc(sizeof(*ugrps)*nBatch*256); @@ -1160,7 +1161,7 @@ void radix_r(const int from, const int to, const int radix) { free(my_otmp); free(my_ktmp); STOP(_("Failed to allocate 'my_otmp' and/or 'my_ktmp' arrays (%d bytes)."), (int)((sizeof(*my_otmp) + sizeof(*my_ktmp)) * batchSize)); } - // TODO: move these up above and point restrict[me] to them. Easier to Error that way if failed to alloc. + // todo: move these up above and point restrict[me] to them. Easier to Error that way if failed to alloc. #pragma omp for for (int batch=0; batchUINT16_MAX) { anyBig=true; break; } if (anyBig) { @@ -1371,7 +1372,7 @@ SEXP issorted(SEXP x, SEXP by) // Always increasing order with NA's first // Similar to base:is.unsorted but accepts NA at the beginning (standard in data.table and considered sorted) rather than // returning NA when NA present, and is multi-column. - // TODO: test in big steps first to return faster if unsortedness is at the end (a common case of rbind'ing data to end) + // todo: test in big steps first to return faster if unsortedness is at the end (a common case of rbind'ing data to end) // These are all sequential access to x, so quick and cache efficient. Could be parallel by checking continuity at batch boundaries. if (!isNull(by) && !isInteger(by)) internal_error_with_cleanup(__func__, "issorted 'by' must be NULL or integer vector"); @@ -1396,7 +1397,7 @@ SEXP issorted(SEXP x, SEXP by) while (i=xd[i-1]) i++; } else { double *xd = REAL(x); - while (i=dtwiddle(xd[i-1])) i++; // TODO: change to loop over any NA or -Inf at the beginning and then proceed without dtwiddle() (but rounding) + while (i=dtwiddle(xd[i-1])) i++; // todo: change to loop over any NA or -Inf at the beginning and then proceed without dtwiddle() (but rounding) } break; case STRSXP : { @@ -1463,7 +1464,7 @@ SEXP issorted(SEXP x, SEXP by) } break; case 1: { // regular double in REALSXP const double *p = (const double *)colp; - ok = dtwiddle(p[0])>dtwiddle(p[-1]); // TODO: avoid dtwiddle by looping over any NA at the beginning, and remove NumericRounding. + ok = dtwiddle(p[0])>dtwiddle(p[-1]); // todo: avoid dtwiddle by looping over any NA at the beginning, and remove NumericRounding. } break; case 2: { // integer64 in REALSXP const int64_t *p = (const int64_t *)colp; @@ -1474,7 +1475,7 @@ SEXP issorted(SEXP x, SEXP by) if (*p==NA_STRING) { ok = false; // previous value not NA (otherwise memcmp would have returned equal above) so can't be ordered } else { - ok = (NEED2UTF8(p[0]) || NEED2UTF8(p[-1]) ? // TODO: provide user option to choose ascii-only mode + ok = (NEED2UTF8(p[0]) || NEED2UTF8(p[-1]) ? // todo: provide user option to choose ascii-only mode strcmp(CHAR(ENC2UTF8(p[0])), CHAR(ENC2UTF8(p[-1]))) : strcmp(CHAR(p[0]), CHAR(p[-1]))) >= 0; } diff --git a/src/fread.c b/src/fread.c index 4abed040c4..ee7457a395 100644 --- a/src/fread.c +++ b/src/fread.c @@ -502,11 +502,11 @@ static void Field(FieldParseContext *ctx) while(!end_of_field(ch)) ch++; // sep, \r, \n or eof will end *ctx->ch = ch; int fieldLen = (int)(ch - fieldStart); - //if (stripWhite) { // TODO: do this if and the next one together once in bulk afterwards before push + //if (stripWhite) { // todo: do this if and the next one together once in bulk afterwards before push while(fieldLen > 0 && ((ch[-1] == ' ' && stripWhite) || ch[-1] == '\0')) { fieldLen--; ch--; } // this space can't be sep otherwise it would have stopped the field earlier inside end_of_field() //} - if ((fieldLen == 0 && blank_is_a_NAstring) || (fieldLen && end_NA_string(fieldStart) == ch)) fieldLen = INT32_MIN; // TODO - speed up by avoiding end_NA_string when there are none + if ((fieldLen == 0 && blank_is_a_NAstring) || (fieldLen && end_NA_string(fieldStart) == ch)) fieldLen = INT32_MIN; // todo: speed up by avoiding end_NA_string when there are none target->off = (int32_t)(fieldStart - ctx->anchor); target->len = fieldLen; return; @@ -637,7 +637,7 @@ static void StrtoI64(FieldParseContext *ctx) ch += sf; // INT64 range is NA==-9223372036854775808(INT64_MIN) then symmetric [-9223372036854775807,+9223372036854775807]. // A 20+ digit number is caught as too large via the field width check <=19, since leading zeros trigger character type not numeric - // TODO Check tests exist that +9223372036854775808 and +9999999999999999999 are caught as too large. They are stll 19 wide + // todo: Check tests exist that +9223372036854775808 and +9999999999999999999 are caught as too large. They are stll 19 wide // and, fortunately, uint64 can hold 9999999999999999999 (19 9's) so that doesn't overflow uint64. //if ((acc && *start!='0' && acc<=INT64_MAX && (ch-start)<=19) || // (acc==0 && ch-start==1)) { @@ -650,7 +650,7 @@ static void StrtoI64(FieldParseContext *ctx) } // generate freadLookups.h -// TODO: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] +// todo: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] /* f = "~/data.table/src/freadLookups.h" cat("const long double pow10lookup[301] = {\n", file=f, append=FALSE) @@ -658,7 +658,7 @@ for (i in 0:299) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) cat("1.0E300L\n};\n", file=f, append=TRUE) */ -// TODO Add faster StrtoD_quick for small precision positive numerics such as prices without a sign or +// todo: Add faster StrtoD_quick for small precision positive numerics such as prices without a sign or // an E (e.g. $.cents) which don't need I64, long double, or Inf/NAN. Could have more than two levels. /** @@ -1416,7 +1416,7 @@ int freadMain(freadMainArgs _args) { // No MAP_POPULATE for faster nrows=10 and to make possible earlier progress bar in row count stage // Mac doesn't appear to support MAP_POPULATE anyway (failed on CRAN when I tried). - // TO DO?: MAP_HUGETLB for Linux but seems to need admin to setup first. My Hugepagesize is 2MB (>>2KB, so promising) + // todo: MAP_HUGETLB for Linux but seems to need admin to setup first. My Hugepagesize is 2MB (>>2KB, so promising) // https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt mmp = mmap(NULL, fileSize, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); // COW for last page lastEOLreplaced #ifdef __EMSCRIPTEN__ @@ -1542,7 +1542,7 @@ int freadMain(freadMainArgs _args) { // We could do this routinely (i.e. when there is a final newline too) but we desire to run all tests through the harder // branch above that replaces the final newline with \0 to test that logic (e.g. test 893 which causes a type bump in the last // field) since we rely on that logic to avoid the copy below when fileSize$4096==0 but there is a final eol ok. - // TODO: portable way to discover relevant page size. 4096 is lowest common denominator, though, and should suffice. + // todo: portable way to discover relevant page size. 4096 is lowest common denominator, though, and should suffice. } else { const char *msg = _("This file is very unusual: it ends abruptly without a final newline, and also its size is a multiple of 4096 bytes. Please properly end the last row with a newline using for example 'echo >> file'"); if (verbose) @@ -1708,7 +1708,7 @@ int freadMain(freadMainArgs _args) { if (quoteRule > 1 && quote) continue; // turn off self-healing quote rule when filling int firstRowNcol = countfields(&ch); int thisncol = 0, maxncol = firstRowNcol, thisRow = 0; - while (ch < eof && ++thisRow < jumpLines) { // TODO: rename 'jumpLines' to 'jumpRows' + while (ch < eof && ++thisRow < jumpLines) { // todo: rename 'jumpLines' to 'jumpRows' thisncol = countfields(&ch); if (thisncol < 0) break; // invalid line; e.g. improper quote if (thisncol > maxncol) maxncol = thisncol; @@ -1804,7 +1804,7 @@ int freadMain(freadMainArgs _args) { quoteRule = topQuoteRule; if (quoteRule > 1 && quote) { DTWARN(_("Found and resolved improper quoting in first %d rows. If the fields are not quoted (e.g. field separator does not appear within any field), try quote=\"\" to avoid this warning."), jumpLines); - // TODO: include line number and text in warning. Could loop again with the standard quote rule to find the line that fails. + // todo: include line number and text in warning. Could loop again with the standard quote rule to find the line that fails. } sep = topSep; whiteChar = (sep == ' ' ? '\t' : (sep == '\t' ? ' ' : 0)); @@ -1813,7 +1813,7 @@ int freadMain(freadMainArgs _args) { // leave pos on the first populated line; that is start of data ch = pos; } else { - // TODO: warning here if topSkip>0 that header is being auto-removed. + // todo: warning here if topSkip>0 that header is being auto-removed. ch = pos = topStart; row1line += topSkip; } @@ -2191,7 +2191,7 @@ int freadMain(freadMainArgs _args) { rowSize1 = 0; rowSize4 = 0; rowSize8 = 0; - size = malloc(sizeof(*size) * ncol); // TODO: remove size[] when we implement Pasha's idea to += size inside processor + size = malloc(sizeof(*size) * ncol); // todo: remove size[] when we implement Pasha's idea to += size inside processor if (!size) STOP(_("Failed to allocate %zu bytes for '%s': %s"), sizeof(*size) * ncol, "size", strerror(errno)); // # nocov nStringCols = 0; @@ -2205,7 +2205,7 @@ int freadMain(freadMainArgs _args) { typeName[tmpType[j]], typeName[type[j]]); } type[j] = tmpType[j]; - // TODO: apply overrides to lower type afterwards and warn about the loss of accuracy then (if any); e.g. "4.0" would be fine to coerce to integer with no warning since + // todo: apply overrides to lower type afterwards and warn about the loss of accuracy then (if any); e.g. "4.0" would be fine to coerce to integer with no warning since // no loss of accuracy but must be read as double for now in case "4.3" occurs out of sample to know if warning about accuracy is needed afterwards. } nUserBumped += type[j] > tmpType[j]; @@ -2400,7 +2400,7 @@ int freadMain(freadMainArgs _args) { int j = 0; //*** START HOT ***// - if (sep != ' ' && !any_number_like_NAstrings) { // TODO: can this 'if' be dropped somehow? Can numeric NAstrings be dealt with afterwards in one go as numeric comparison? + if (sep != ' ' && !any_number_like_NAstrings) { // todo: can this 'if' be dropped somehow? Can numeric NAstrings be dealt with afterwards in one go as numeric comparison? // Try most common and fastest branch first: no whitespace, no quoted numeric, ",," means NA while (j < ncol) { // DTPRINT(_("Field %d: '%.10s' as type %d (tch=%p)\n"), j+1, tch, type[j], tch); @@ -2441,7 +2441,7 @@ int freadMain(freadMainArgs _args) { // (End-of-file) is also dealt with now, as could be the highly unusual line ending /n/r // This way (each line has new opportunity of the fast path) if only a little bit of the file is quoted (e.g. just when commas are present as fwrite does) // then a penalty isn't paid everywhere. - // TODO: reduce(slowerBranch++). So we can see in verbose mode if this is happening too much. + // todo: reduce(slowerBranch++). So we can see in verbose mode if this is happening too much. if (sep == ' ') { while (*tch == ' ') tch++; // multiple sep=' ' at the tLineStart does not mean sep. We're at tLineStart because the fast branch above doesn't run when sep=' ' @@ -2797,7 +2797,7 @@ int freadMain(freadMainArgs _args) { DTPRINT(_("%8.3fs Total\n"), tTot); if (typeBumpMsg) { // if type bumps happened, it's useful to see them at the end after the timing 2 lines up showing the reread time - // TODO - construct and output the copy and pastable colClasses argument so user can avoid the reread time if they are + // todo: construct and output the copy and pastable colClasses argument so user can avoid the reread time if they are // reading this file or files formatted like it many times (say in a production environment). DTPRINT("%s", typeBumpMsg); // # notranslate free(typeBumpMsg); // local scope and only populated in verbose mode diff --git a/src/freadR.c b/src/freadR.c index a302764657..ef99c460ee 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -480,7 +480,7 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size if (none) setAttrib(DT, sym_colClassesAs, R_NilValue); else if (selectRank) setAttrib(DT, sym_colClassesAs, subsetVector(colClassesAs, selectRank)); // reorder the colClassesAs } - // TODO: move DT size calculation into a separate function (since the final size is different from the initial size anyways) + // todo: move DT size calculation into a separate function (since the final size is different from the initial size anyways) size_t DTbytes = RTYPE_SIZEOF(DT)*(ncol-ndrop)*2; // the VECSXP and its column names (exclude global character cache usage) // For each column we could have one of the following cases: @@ -612,7 +612,7 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx) } source += cnt8; } - done++; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too + done++; // if just one string col near the start, don't loop over the other 10,000 cols. todo: start on first too } off8 += (size[j] == 8); } @@ -712,7 +712,7 @@ void progress(int p, int eta) { void halt__(bool warn, const char *format, ...) { // Solves: http://stackoverflow.com/questions/18597123/fread-data-table-locks-files - // TODO: always include fnam in the STOP message. For log files etc. + // todo: always include fnam in the STOP message. For log files etc. va_list args; va_start(args, format); char msg[2000]; diff --git a/src/fsort.c b/src/fsort.c index 2f876dd804..11ef811667 100644 --- a/src/fsort.c +++ b/src/fsort.c @@ -1,8 +1,8 @@ #include "data.table.h" -#define INSERT_THRESH 200 // TODO: expose via api and test +#define INSERT_THRESH 200 // tpdo: expose via api and test -static void dinsert(double *x, const int n) { // TODO: if and when twiddled, double => ull +static void dinsert(double *x, const int n) { // todo: if and when twiddled, double => ull if (n<2) return; for (int i=1; imyMax) myMax=*d; d++; @@ -165,14 +165,14 @@ SEXP fsort(SEXP x, SEXP verboseArg) { t[2] = wallclock(); double min=mins[0], max=maxs[0]; for (int i=1; imax) max=maxs[i]; } free(mins); free(maxs); if (verbose) Rprintf(_("Range = [%g,%g]\n"), min, max); if (min < 0.0) error(_("Cannot yet handle negatives.")); - // TODO: -0ULL should allow negatives + // todo: -0ULL should allow negatives // avoid twiddle function call as expensive in recent tests (0.34 vs 2.7) // possibly twiddle once to *ans, then untwiddle at the end in a fast parallel sweep @@ -232,7 +232,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) { ansi64[ thisCounts[(*source - minULL) >> shift]++ ] = *source; // This assignment to ans is not random access as it may seem, but cache efficient by // design since target pages are written to contiguously. MSBsize * 4k < cache. - // TODO: therefore 16 bit MSB seems too big for this step. Time this step and reduce 16 a lot. + // todo: therefore 16 bit MSB seems too big for this step. Time this step and reduce 16 a lot. // 20MB cache / nth / 4k => MSBsize=160 source++; } @@ -260,7 +260,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) { qsort_data = msbCounts; qsort(order, MSBsize, sizeof(int), qsort_cmp); // find order of the sizes, largest first // Would have liked to define qsort_cmp() inside this function right here, but not sure that's fully portable. - // TODO: time this qsort but likely insignificant. + // todo: time this qsort but likely insignificant. if (verbose) { Rprintf(_("Top 20 MSB counts: ")); for(int i=0; i16) bitshift=nb/2; // TODO: when we have stress-test off mode, do this + // bitshift=MAX(nb-8,0); if (bitshift>16) bitshift=nb/2; // todo: when we have stress-test off mode, do this mask = (1<>bitshift) + 1; - grp = (int *)R_alloc(nrow, sizeof(*grp)); // TODO: use malloc and made this local as not needed globally when all functions here use gather + grp = (int *)R_alloc(nrow, sizeof(*grp)); // todo: use malloc and made this local as not needed globally when all functions here use gather // maybe better to malloc to avoid R's heap. This grp isn't global, so it doesn't need to be R_alloc const int *restrict fp = INTEGER(f); - nBatch = MIN((nrow+1)/2, getDTthreads(nrow, true)*2); // *2 to reduce last-thread-home. TODO: experiment. The higher this is though, the bigger is counts[] + nBatch = MIN((nrow+1)/2, getDTthreads(nrow, true)*2); // *2 to reduce last-thread-home. todo: experiment. The higher this is though, the bigger is counts[] batchSize = MAX(1, (nrow-1)/nBatch); lastBatchSize = nrow - (nBatch-1)*batchSize; // We deliberate use, for example, 40 batches of just 14 rows, to stress-test tests. This strategy proved to be a good one as #3204 immediately came to light. - // TODO: enable stress-test mode in tests only (#3205) which can be turned off by default in release to decrease overhead on small data + // todo: enable stress-test mode in tests only (#3205) which can be turned off by default in release to decrease overhead on small data // if that is established to be biting (it may be fine). if (nBatch<1 || batchSize<1 || lastBatchSize<1) { internal_error(__func__, "nrow=%d ngrp=%d nbit=%d bitshift=%d highSize=%zu nBatch=%zu batchSize=%zu lastBatchSize=%zu\n", // # nocov @@ -114,10 +114,10 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { const int *restrict op = INTEGER(o); // o is a permutation of 1:nrow int nb = nbit(nrow-1); - int bitshift = MAX(nb-8, 0); // TODO: experiment nb/2. Here it doesn't have to be /2 currently. + int bitshift = MAX(nb-8, 0); // todo: experiment nb/2. Here it doesn't have to be /2 currently. int highSize = ((nrow-1)>>bitshift) + 1; //Rprintf(_("When assigning grp[o] = g, highSize=%d nb=%d bitshift=%d nBatch=%d\n"), highSize, nb, bitshift, nBatch); - int *counts = calloc(nBatch*highSize, sizeof(*counts)); // TODO: cache-line align and make highSize a multiple of 64 + int *counts = calloc(nBatch*highSize, sizeof(*counts)); // todo: cache-line align and make highSize a multiple of 64 int *TMP = malloc(sizeof(*TMP) * nrow*2l); // must multiple the long int otherwise overflow may happen, #4295 if (!counts || !TMP ) { free(counts); free(TMP); // # nocov @@ -154,7 +154,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { const int end = counts[ b*highSize + h ]; const int *restrict p = TMP + b*2*batchSize + start*2; for (int k=start; k // #include // the debugging machinery + breakpoint aidee -// TODO: rewrite/simplify logic -- took me ages to understand what I wrote!! -// TODO: benchmark and parallelise slow regions -// TODO: implement 'lookup' for 'gaps' and 'overlaps' arguments +// todo: rewrite/simplify logic -- took me ages to understand what I wrote!! +// todo: benchmark and parallelise slow regions +// todo: implement 'lookup' for 'gaps' and 'overlaps' arguments SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP multArg, SEXP typeArg, SEXP verbose) { SEXP vv, tt, lookup, type_lookup; @@ -49,7 +49,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul } } if (type != WITHIN) { - for (int i=0; i=xn || ixo[j]<=0) { diff --git a/src/rbindlist.c b/src/rbindlist.c index d5b08faa4b..f2150649c8 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -551,7 +551,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg, SEXP ignor if (ret) warning(_("Column %d of item %d: %s"), w+1, i+1, ret); } // e.g. when precision is lost like assigning 3.4 to integer64; test 2007.2 - // TODO: but maxType should handle that and this should never warn + // todo: but maxType should handle that and this should never warn } ansloc += thisnrow; } diff --git a/src/reorder.c b/src/reorder.c index 66f1cc6441..9ad50b0fab 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -40,7 +40,7 @@ SEXP reorder(SEXP x, SEXP order) if (length(order) != nrow) error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate int nprotect = 0; - if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand + if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // todo: if it's an ALTREP sequence some optimizations are possible rather than expand const int *restrict idx = INTEGER(order); int i=0; diff --git a/src/subset.c b/src/subset.c index 0090555c85..d723dc17f4 100644 --- a/src/subset.c +++ b/src/subset.c @@ -69,7 +69,7 @@ void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA) // To go parallel here would need access to NODE_IS_OLDER, at least. Given gcgen, mark and named // are upper bounded and max 3, REFCNT==REFCNTMAX could be checked first and then critical SET_ if not. // Inside that critical just before SET_ it could check REFCNTmax) return "Internal inefficiency: idx contains an item out-of-range. Should have been dealt with earlier."; // e.g. test 1639.64 anyNA |= elem==NA_INTEGER; anyLess |= elem=2x speedup) @@ -147,7 +147,7 @@ SEXP uniqlist(SEXP l, SEXP order) } SEXP uniqlengths(SEXP x, SEXP n) { - // seems very similar to rbindlist.c:uniq_lengths. TODO: centralize into common function + // seems very similar to rbindlist.c:uniq_lengths. todo: centralize into common function if (TYPEOF(x) != INTSXP) error(_("Input argument 'x' to 'uniqlengths' must be an integer vector")); if (TYPEOF(n) != INTSXP || length(n) != 1) error(_("Input argument 'n' to 'uniqlengths' must be an integer vector of length 1")); R_len_t len = length(x); @@ -193,7 +193,7 @@ SEXP rleid(SEXP l, SEXP cols) { break; case STRSXP : same = STRING_ELT(jcol,i)==STRING_ELT(jcol,i-1); - // TODO: do we want to check encodings here now that forder seems to? + // todo: do we want to check encodings here now that forder seems to? // Old comment : forder checks no non-ascii unknown, and either UTF-8 or Latin1 but not both. // So == pointers is ok given that check break; @@ -323,7 +323,7 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul } if (b) break; } - // TODO: move this as the outer for-loop and parallelise.. + // todo: move this as the outer for-loop and parallelise.. // but preferably wait to see if problems with that big non-equi // group sizes do occur that commonly before to invest time here. int tmp=0; diff --git a/src/utils.c b/src/utils.c index ff151a9214..58f5085ec9 100644 --- a/src/utils.c +++ b/src/utils.c @@ -533,7 +533,7 @@ bool isRectangularList(SEXP x) { return isRectangular(x); } -// TODO: use isDataFrame (when included in any R release). +// todo: use isDataFrame (when included in any R release). // isDataTable(x) || isFrame(x) || isRectangularList(x) bool perhapsDataTable(SEXP x) { return isDataTable(x) || isFrame(x) || isRectangularList(x); diff --git a/src/wrappers.c b/src/wrappers.c index d8e361c9dd..fcde677fbf 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -31,7 +31,7 @@ SEXP setattrib(SEXP x, SEXP name, SEXP value) } else { setAttrib(x, name, MAYBE_REFERENCED(value) ? duplicate(value) : value); // duplicate is temp fix to restore R behaviour prior to R-devel change on 10 Jan 2014 (r64724). - // TO DO: revisit. Enough to reproduce is: DT=data.table(a=1:3); DT[2]; DT[,b:=2] + // todo: revisit. Enough to reproduce is: DT=data.table(a=1:3); DT[2]; DT[,b:=2] // ... Error: selfrefnames is ok but tl names [1] != tl [100] } return(R_NilValue);