From 9202261a4641532635d074be2a1fc03da3bf2619 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 6 Feb 2026 12:51:44 +0100 Subject: [PATCH 01/17] [rntuple] add handle for fields/columns bookkeeping Like with TTree branches, prepare all data and functions before start reading --- modules/rntuple.mjs | 46 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index fe29976ec..3ffb823b4 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1091,8 +1091,50 @@ async function readNextCluster(rntuple, selector) { // TODO args can later be used to filter fields, limit entries, etc. // Create reader and deserialize doubles from the buffer function rntupleProcess(rntuple, selector, args) { - return readHeaderFooter(rntuple).then(() => { - selector.Begin(); + const handle = { + rntuple, // keep rntuple reference + file: rntuple.$file, // keep file reference + selector, // reference on selector + arr: [], // list of special handles per columns, more than one column per field may exist + curr: -1, // current entry ID + current_entry: -1, // current processed entry + simple_read: true, // all baskets in all used branches are in sync, + process_arrays: false // one can process all branches as arrays + }; + + selector.Begin(); + + return readHeaderFooter(rntuple).then(res => { + if (!res) { + selector.Terminate(false); + return selector; + } + + for (let i = 0; i < selector.numBranches(); ++i) { + const name = getSelectorFieldName(selector, i); + if (!name) + throw new Error(`Not able to extract name for field ${i}`); + + const columns = rntuple.fieldToColumns[name]; + if (!columns) + throw new Error(`No columns found for field '${name}' in RNTuple`); + + for (let k = 0; k < columns.length; ++k) { + const item = { + column: columns[k], + coltype: columns[k].coltype + }; + + // special handling of split types + if ((item.coltype >= ENTupleColumnType.kSplitInt16) && (item.coltype <= ENTupleColumnType.kSplitIndex64)) { + item.splittype = item.coltype; + item.coltype -= (ENTupleColumnType.kSplitInt16 - ENTupleColumnType.kInt16); + } + + handle.arr.push(item); + } + } + selector.currentCluster = 0; selector.currentEntry = 0; return readNextCluster(rntuple, selector, args); From 3dcc58684da4b3eccf1130971936d64b8b09c4bf Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 6 Feb 2026 13:45:54 +0100 Subject: [PATCH 02/17] [rntuple] create reader func for columns Try to mimic functionality which exists for the TTree --- modules/rntuple.mjs | 109 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 3ffb823b4..a12da1411 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -114,6 +114,7 @@ class RBufferReader { } + const ENTupleColumnType = { kBit: 0x00, kByte: 0x01, @@ -1088,6 +1089,98 @@ async function readNextCluster(rntuple, selector) { }); } +function assignReadFunc(item, item0) { + switch (item.coltype) { + case ENTupleColumnType.kBit: { + item.func = function(view, obj) { + obj[this.name] = false; + this.o++; + }; + break; + } + case ENTupleColumnType.kReal64: + item.func = function(view, obj) { + obj[this.name] = view.getFloat64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + break; + case ENTupleColumnType.kReal32: + item.func = function(view, obj) { + obj[this.name] = view.getFloat32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + break; + case ENTupleColumnType.kInt64: + case ENTupleColumnType.kIndex64: + item.func = function(view, obj) { + obj[this.name] = view.getBigInt64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + break; + case ENTupleColumnType.kUInt64: + item.func = function(view, obj) { + obj[this.name] = view.getBigUint64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + break; + case ENTupleColumnType.kInt32: + case ENTupleColumnType.kIndex32: + item.func = function(view, obj) { + obj[this.name] = view.getInt32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + break; + case ENTupleColumnType.kUInt32: + item.func = function(view, obj) { + obj[this.name] = view.getUint32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + break; + case ENTupleColumnType.kInt16: + item.func = function(view, obj) { + obj[this.name] = view.getInt16(this.o, LITTLE_ENDIAN); + this.o += 2; + }; + break; + case ENTupleColumnType.kUInt16: + item.func = function(view, obj) { + obj[this.name] = view.getUint16(this.o, LITTLE_ENDIAN); + this.o += 2; + }; + break; + case ENTupleColumnType.kInt8: + item.func = function(view, obj) { + obj[this.name] = view.getInt8(this.o++); + }; + break; + case ENTupleColumnType.kUInt8: + case ENTupleColumnType.kByte: + item.func = function(view, obj) { + obj[this.name] = view.getUint8(this.o++); + }; + break; + case ENTupleColumnType.kChar: + if (item0) { + item.namecnt = item0.name; + item.func = function(view, obj) { + const len = obj[this.namecnt]; + let s = ''; + for (let i = 0; i < len; ++i) + s += String.fromCharCode(view.getInt8(this.o++)); + obj[this.name] = s; + }; + } else { + item.func = function(view, obj) { + obj[this.name] = String.fromCharCode(view.getInt8(this.o++)); + }; + } + break; + default: + throw new Error(`Unsupported column type: ${item.coltype}`); + } +} + + // TODO args can later be used to filter fields, limit entries, etc. // Create reader and deserialize doubles from the buffer function rntupleProcess(rntuple, selector, args) { @@ -1119,10 +1212,13 @@ function rntupleProcess(rntuple, selector, args) { if (!columns) throw new Error(`No columns found for field '${name}' in RNTuple`); + let item0 = null; + for (let k = 0; k < columns.length; ++k) { const item = { column: columns[k], - coltype: columns[k].coltype + coltype: columns[k].coltype, + splittype: 0 }; // special handling of split types @@ -1131,6 +1227,17 @@ function rntupleProcess(rntuple, selector, args) { item.coltype -= (ENTupleColumnType.kSplitInt16 - ENTupleColumnType.kInt16); } + assignReadFunc(item, item0); + + item.name = selector.nameOfBranch(i); // target object name + + // case when two columns read like for the std::string, + // but most probably for some other cases + if ((columns.length === 2) && (k === 0)) { + item.name = `___indx${i}`; + item0 = item; + } + handle.arr.push(item); } } From d272628192211d5dc752151dc4e85a686120ea05 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 6 Feb 2026 14:43:33 +0100 Subject: [PATCH 03/17] [rntuple] split old and new process implementation --- modules/rntuple.mjs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index a12da1411..8f41a1b2c 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1181,9 +1181,7 @@ function assignReadFunc(item, item0) { } -// TODO args can later be used to filter fields, limit entries, etc. -// Create reader and deserialize doubles from the buffer -function rntupleProcess(rntuple, selector, args) { +function rntupleProcessNew(rntuple, selector, args) { const handle = { rntuple, // keep rntuple reference file: rntuple.$file, // keep file reference @@ -1248,6 +1246,22 @@ function rntupleProcess(rntuple, selector, args) { }).then(() => selector); } + +// TODO args can later be used to filter fields, limit entries, etc. +// Create reader and deserialize doubles from the buffer +function rntupleProcess(rntuple, selector, args) { + selector.Begin(); + return readHeaderFooter(rntuple).then(res => { + if (!res) { + selector.Terminate(false); + return selector; + } + selector.currentCluster = 0; + selector.currentEntry = 0; + return readNextCluster(rntuple, selector, args); + }).then(() => selector); +} + class TDrawSelectorTuple extends TDrawSelector { /** @summary Return total number of entries From a0eca144783162458e1b636f0456cb1b7dd57b6a Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Fri, 6 Feb 2026 17:42:12 +0100 Subject: [PATCH 04/17] [rntuple] very first draft of alternative reading --- modules/rntuple.mjs | 202 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 191 insertions(+), 11 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 8f41a1b2c..cdddda6d5 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1093,9 +1093,13 @@ function assignReadFunc(item, item0) { switch (item.coltype) { case ENTupleColumnType.kBit: { item.func = function(view, obj) { + console.log('bit reading not properly implemented'); obj[this.name] = false; this.o++; }; + item.shift = function() { + console.log('bit shift not implemented'); + }; break; } case ENTupleColumnType.kReal64: @@ -1103,12 +1107,14 @@ function assignReadFunc(item, item0) { obj[this.name] = view.getFloat64(this.o, LITTLE_ENDIAN); this.o += 8; }; + item.sz = 8; break; case ENTupleColumnType.kReal32: item.func = function(view, obj) { obj[this.name] = view.getFloat32(this.o, LITTLE_ENDIAN); this.o += 4; }; + item.sz = 4; break; case ENTupleColumnType.kInt64: case ENTupleColumnType.kIndex64: @@ -1116,12 +1122,14 @@ function assignReadFunc(item, item0) { obj[this.name] = view.getBigInt64(this.o, LITTLE_ENDIAN); this.o += 8; }; + item.sz = 8; break; case ENTupleColumnType.kUInt64: item.func = function(view, obj) { obj[this.name] = view.getBigUint64(this.o, LITTLE_ENDIAN); this.o += 8; }; + item.sz = 8; break; case ENTupleColumnType.kInt32: case ENTupleColumnType.kIndex32: @@ -1129,35 +1137,41 @@ function assignReadFunc(item, item0) { obj[this.name] = view.getInt32(this.o, LITTLE_ENDIAN); this.o += 4; }; + item.sz = 4; break; case ENTupleColumnType.kUInt32: item.func = function(view, obj) { obj[this.name] = view.getUint32(this.o, LITTLE_ENDIAN); this.o += 4; }; + item.sz = 4; break; case ENTupleColumnType.kInt16: item.func = function(view, obj) { obj[this.name] = view.getInt16(this.o, LITTLE_ENDIAN); this.o += 2; }; + item.sz = 2; break; case ENTupleColumnType.kUInt16: item.func = function(view, obj) { obj[this.name] = view.getUint16(this.o, LITTLE_ENDIAN); this.o += 2; }; + item.sz = 2; break; case ENTupleColumnType.kInt8: item.func = function(view, obj) { obj[this.name] = view.getInt8(this.o++); }; + item.sz = 1; break; case ENTupleColumnType.kUInt8: case ENTupleColumnType.kByte: item.func = function(view, obj) { obj[this.name] = view.getUint8(this.o++); }; + item.sz = 1; break; case ENTupleColumnType.kChar: if (item0) { @@ -1169,37 +1183,166 @@ function assignReadFunc(item, item0) { s += String.fromCharCode(view.getInt8(this.o++)); obj[this.name] = s; }; + item.shift = function() { + console.log('not implemented'); + }; } else { item.func = function(view, obj) { obj[this.name] = String.fromCharCode(view.getInt8(this.o++)); }; + item.sz = 1; } break; default: throw new Error(`Unsupported column type: ${item.coltype}`); } + + if (item.sz && !item.shift) { + item.shift = function(entries) { + this.o += entries * this.sz; + }; + } +} + +function reconstructBlob(blob, columnDescriptor) { + const originalColtype = columnDescriptor.coltype, + data = recontructUnsplitBuffer(blob, columnDescriptor); + + // Handle split index types + if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) + return DecodeDeltaIndex(data.blob, data.coltype).blob; + + // Handle Split Signed Int types + if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) + return decodeZigzag(data.blob, data.coltype).blob; + + return data.blob; } -function rntupleProcessNew(rntuple, selector, args) { +async function rntupleProcessNew(rntuple, selector, args) { const handle = { rntuple, // keep rntuple reference file: rntuple.$file, // keep file reference selector, // reference on selector arr: [], // list of special handles per columns, more than one column per field may exist curr: -1, // current entry ID + current_cluster: -1, // current cluster to process + current_cluster_first_entry: 0, // first entry in current cluster current_entry: -1, // current processed entry simple_read: true, // all baskets in all used branches are in sync, - process_arrays: false // one can process all branches as arrays + process_arrays: false, // one can process all branches as arrays + firstentry: 0, // first entry in the rntuple + lastentry: 0 // last entry in the rntuple }; - selector.Begin(); + function readNextPortion(inc_cluster) { + if (inc_cluster !== undefined) { + if (inc_cluster === 1) + handle.current_cluster_first_entry += rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; + handle.current_cluster += inc_cluster; + handle.arr.forEach(item => { + item.page = 0; + item.e0 = handle.current_cluster_first_entry; + }); + } - return readHeaderFooter(rntuple).then(res => { - if (!res) { - selector.Terminate(false); + const locations = rntuple.builder.pageLocations[handle.current_cluster]; + if (!locations) { + selector.Terminate(true); return selector; } + let end_of_cluster = 0; + const dataToRead = [], itemsToRead = []; + + // loop over all columns and request buffer + for (let i = 0; i < handle.arr.length; ++i) { + const item = handle.arr[i]; + + if (!item.view) { + const pages = locations[item.id].pages; + while (pages[item.page] && (item.e0 + pages[item.page].numElements < handle.current_entry)) { + item.e0 += pages[item.page].numElements; + item.page++; + } + if (!pages[item.page]) + end_of_cluster++; + else { + item.e1 = item.e0 + pages[item.page].numElements; + itemsToRead.push(item); + dataToRead.push(pages[item.page].locator.offset, pages[item.page].locator.size); + } + } + } + + if (end_of_cluster) { + if (end_of_cluster !== handle.arr.length) + throw new Error('Missmatch at the cluster boundary'); + return readNextPortion(1); + } + + return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { + const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], + unzipPromises = blobs.map((blob, idx) => { + const item = itemsToRead[idx], + colEntry = locations[item.id], // Access column entry + page = locations[item.page], // requested page + numElements = Number(page.numElements), + elementSize = item.column.bitsOnStorage / 8; + + let expectedSize = numElements * elementSize; + // Special handling for boolean fields + if (item.coltype === ENTupleColumnType.kBit) + expectedSize = Math.ceil(numElements / 8); + + // Check if data is compressed + if ((colEntry.compression === 0) || (blob.byteLength === expectedSize)) + return Promise.resolve(blob); // Uncompressed: use blob directly + + // Try decompression + return R__unzip(blob, expectedSize).then(result => { + return result || blob; // Fallback to original blob ?? + }).catch(err => { + throw new Error(`Failed to unzip page ${idx}: ${err.message}`); + }); + }); + + return Promise.all(unzipPromises).then(unzipBlobs => { + for (let i = 0; i < unzipBlobs.length; ++i) { + const rawblob = unzipBlobs[i], + item = itemsToRead[i]; + // Ensure blob is a DataView + if (!(rawblob instanceof DataView)) + throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(rawblob)}`); + + item.view = reconstructBlob(rawblob, item.column); + item.o = 0; + if (item.e0 > handle.current_entry) + item.shift(handle.current_entry - item.e0); // FIXME - string will not work this way + } + + let hasData = true; + + while (hasData) { + for (let i = 0; i < handle.items.length; ++i) { + const item = handle.items[i]; + item.func(item.view, selector.tgtobj); + if (++item.e0 >= item.e1) { + delete item.view; // data is over + hasData = false; + } + } + selector.Process(handle.current_entry++); + } + + return readNextPortion(); + }); + }); + } + + return readHeaderFooter(rntuple).then(res => { + if (!res) + throw new Error('Not able to read header for the RNtuple'); for (let i = 0; i < selector.numBranches(); ++i) { const name = getSelectorFieldName(selector, i); @@ -1213,10 +1356,13 @@ function rntupleProcessNew(rntuple, selector, args) { let item0 = null; for (let k = 0; k < columns.length; ++k) { + // TODO - make extra class for the item const item = { column: columns[k], + id: columns[k].index, coltype: columns[k].coltype, - splittype: 0 + splittype: 0, + page: 0 // current page for the reading }; // special handling of split types @@ -1232,7 +1378,7 @@ function rntupleProcessNew(rntuple, selector, args) { // case when two columns read like for the std::string, // but most probably for some other cases if ((columns.length === 2) && (k === 0)) { - item.name = `___indx${i}`; + item.name = `___indx${i}`; // TODO - use not a target object in the future item0 = item; } @@ -1240,9 +1386,43 @@ function rntupleProcessNew(rntuple, selector, args) { } } - selector.currentCluster = 0; - selector.currentEntry = 0; - return readNextCluster(rntuple, selector, args); + // calculate number of entries + rntuple.builder.clusterSummaries.forEach(summary => { handle.lastentry += summary.numEntries; }); + + if (handle.firstentry >= handle.lastentry) + throw new Error('Not able to find entries in the RNtuple'); + + // select range of entries to process + handle.process_min = handle.firstentry; + handle.process_max = handle.lastentry; + + if (Number.isInteger(args.firstentry) && (args.firstentry > handle.firstentry) && (args.firstentry < handle.lastentry)) + handle.process_min = args.firstentry; + + if (Number.isInteger(args.numentries) && (args.numentries > 0)) + handle.process_max = Math.min(handle.process_max, handle.process_min + args.numentries); + + // first check from which cluster one should start + + for (let indx = 0, emin = 0; indx < rntuple.builder.clusterSummaries.length; ++indx) { + const summary = rntuple.builder.clusterSummaries[indx], + emax = emin + summary.numEntries; + if ((handle.process_min >= emin) && (handle.process_min < emax)) { + handle.current_cluster = indx; + handle.current_cluster_first_entry = emin; + break; + } + emin = emax; + } + + if (handle.current_cluster < 0) + throw new Error(`Not able to find cluster for entry ${handle.process_min} in the RNtuple`); + + handle.current_entry = handle.staged_now = handle.process_min; + + selector.Begin(rntuple); + + return readNextPortion(0); }).then(() => selector); } From 6e3cf928e69ccee0660a3f063f6e256877fb6c0a Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 09:23:36 +0100 Subject: [PATCH 05/17] [rntuple] first prototype --- modules/rntuple.mjs | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index cdddda6d5..5998da035 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1220,7 +1220,7 @@ function reconstructBlob(blob, columnDescriptor) { } -async function rntupleProcessNew(rntuple, selector, args) { +async function rntupleProcess(rntuple, selector, args) { const handle = { rntuple, // keep rntuple reference file: rntuple.$file, // keep file reference @@ -1261,16 +1261,18 @@ async function rntupleProcessNew(rntuple, selector, args) { if (!item.view) { const pages = locations[item.id].pages; - while (pages[item.page] && (item.e0 + pages[item.page].numElements < handle.current_entry)) { - item.e0 += pages[item.page].numElements; + while (pages[item.page] && (item.e0 + Number(pages[item.page].numElements) < handle.current_entry)) { + item.e0 += Number(pages[item.page].numElements); item.page++; } if (!pages[item.page]) end_of_cluster++; else { - item.e1 = item.e0 + pages[item.page].numElements; + item.e1 = item.e0 + Number(pages[item.page].numElements); itemsToRead.push(item); - dataToRead.push(pages[item.page].locator.offset, pages[item.page].locator.size); + console.log('submit ', pages[item.page].locator.offset, pages[item.page].locator.size) + + dataToRead.push(Number(pages[item.page].locator.offset), pages[item.page].locator.size); } } } @@ -1286,11 +1288,14 @@ async function rntupleProcessNew(rntuple, selector, args) { unzipPromises = blobs.map((blob, idx) => { const item = itemsToRead[idx], colEntry = locations[item.id], // Access column entry - page = locations[item.page], // requested page + page = colEntry.pages[item.page], // requested page numElements = Number(page.numElements), elementSize = item.column.bitsOnStorage / 8; + console.log('page', page) + let expectedSize = numElements * elementSize; + console.log('expected size', expectedSize, numElements, elementSize, item.column.bitsOnStorage) // Special handling for boolean fields if (item.coltype === ENTupleColumnType.kBit) expectedSize = Math.ceil(numElements / 8); @@ -1315,7 +1320,8 @@ async function rntupleProcessNew(rntuple, selector, args) { if (!(rawblob instanceof DataView)) throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(rawblob)}`); - item.view = reconstructBlob(rawblob, item.column); + item.view = new DataView(reconstructBlob(rawblob, item.column)); + console.log('item.view', item.view, typeof item.view); item.o = 0; if (item.e0 > handle.current_entry) item.shift(handle.current_entry - item.e0); // FIXME - string will not work this way @@ -1324,12 +1330,13 @@ async function rntupleProcessNew(rntuple, selector, args) { let hasData = true; while (hasData) { - for (let i = 0; i < handle.items.length; ++i) { - const item = handle.items[i]; + for (let i = 0; i < handle.arr.length; ++i) { + const item = handle.arr[i]; item.func(item.view, selector.tgtobj); if (++item.e0 >= item.e1) { delete item.view; // data is over hasData = false; + item.page++; } } selector.Process(handle.current_entry++); @@ -1429,7 +1436,7 @@ async function rntupleProcessNew(rntuple, selector, args) { // TODO args can later be used to filter fields, limit entries, etc. // Create reader and deserialize doubles from the buffer -function rntupleProcess(rntuple, selector, args) { +function rntupleProcessOld(rntuple, selector, args) { selector.Begin(); return readHeaderFooter(rntuple).then(res => { if (!res) { From e462de73b24240d373f99f09b318acb4ad289100 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 09:50:12 +0100 Subject: [PATCH 06/17] [rntuple] start pages numbering from -1 --- modules/rntuple.mjs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 5998da035..c847396ca 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1242,8 +1242,10 @@ async function rntupleProcess(rntuple, selector, args) { handle.current_cluster_first_entry += rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; handle.current_cluster += inc_cluster; handle.arr.forEach(item => { - item.page = 0; + item.page = -1; item.e0 = handle.current_cluster_first_entry; + if (item.view) + throw new Error(`still data when processing column ${item.name}`); }); } @@ -1261,17 +1263,19 @@ async function rntupleProcess(rntuple, selector, args) { if (!item.view) { const pages = locations[item.id].pages; - while (pages[item.page] && (item.e0 + Number(pages[item.page].numElements) < handle.current_entry)) { - item.e0 += Number(pages[item.page].numElements); - item.page++; + while (++item.page < pages.length) { + const page = pages[item.page]; + // if current entry inside the page - read buffer + if (handle.current_entry < item.e0 + Number(page.numElements)) + break; + item.e0 += Number(page.numElements); } - if (!pages[item.page]) + if (item.page >= pages.length) { end_of_cluster++; - else { + item.page = -1; + } else { item.e1 = item.e0 + Number(pages[item.page].numElements); itemsToRead.push(item); - console.log('submit ', pages[item.page].locator.offset, pages[item.page].locator.size) - dataToRead.push(Number(pages[item.page].locator.offset), pages[item.page].locator.size); } } @@ -1292,10 +1296,7 @@ async function rntupleProcess(rntuple, selector, args) { numElements = Number(page.numElements), elementSize = item.column.bitsOnStorage / 8; - console.log('page', page) - let expectedSize = numElements * elementSize; - console.log('expected size', expectedSize, numElements, elementSize, item.column.bitsOnStorage) // Special handling for boolean fields if (item.coltype === ENTupleColumnType.kBit) expectedSize = Math.ceil(numElements / 8); @@ -1321,7 +1322,6 @@ async function rntupleProcess(rntuple, selector, args) { throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(rawblob)}`); item.view = new DataView(reconstructBlob(rawblob, item.column)); - console.log('item.view', item.view, typeof item.view); item.o = 0; if (item.e0 > handle.current_entry) item.shift(handle.current_entry - item.e0); // FIXME - string will not work this way @@ -1336,7 +1336,6 @@ async function rntupleProcess(rntuple, selector, args) { if (++item.e0 >= item.e1) { delete item.view; // data is over hasData = false; - item.page++; } } selector.Process(handle.current_entry++); @@ -1369,7 +1368,7 @@ async function rntupleProcess(rntuple, selector, args) { id: columns[k].index, coltype: columns[k].coltype, splittype: 0, - page: 0 // current page for the reading + page: -1 // current page for the reading }; // special handling of split types From 30edcbc34093c31c800b02a6b7223369fb63146f Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 10:00:19 +0100 Subject: [PATCH 07/17] [rntuple] process only specified number of entries --- modules/rntuple.mjs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index c847396ca..73eb211dc 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1339,6 +1339,11 @@ async function rntupleProcess(rntuple, selector, args) { } } selector.Process(handle.current_entry++); + + if (handle.current_entry >= handle.process_max) { + selector.Terminate(true); + return true; + } } return readNextPortion(); From 7e3ad5c08457a2e033ff2274dab9b122d061f65e Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 10:51:22 +0100 Subject: [PATCH 08/17] [rntuple] create special class for columns reading --- modules/rntuple.mjs | 340 +++++++++++++++++++++++--------------------- 1 file changed, 176 insertions(+), 164 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 73eb211dc..ed64d8d39 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1089,137 +1089,186 @@ async function readNextCluster(rntuple, selector) { }); } -function assignReadFunc(item, item0) { - switch (item.coltype) { - case ENTupleColumnType.kBit: { - item.func = function(view, obj) { - console.log('bit reading not properly implemented'); - obj[this.name] = false; - this.o++; - }; - item.shift = function() { - console.log('bit shift not implemented'); - }; - break; +class ReaderItem { + + constructor(column) { + this.column = column; + this.id = column.index; + this.coltype = column.coltype; + this.splittype = 0; + this.page = -1; // current page for the reading + + // special handling of split types + if ((this.coltype >= ENTupleColumnType.kSplitInt16) && (this.coltype <= ENTupleColumnType.kSplitIndex64)) { + this.splittype = this.coltype; + this.coltype -= (ENTupleColumnType.kSplitInt16 - ENTupleColumnType.kInt16); } - case ENTupleColumnType.kReal64: - item.func = function(view, obj) { - obj[this.name] = view.getFloat64(this.o, LITTLE_ENDIAN); - this.o += 8; - }; - item.sz = 8; - break; - case ENTupleColumnType.kReal32: - item.func = function(view, obj) { - obj[this.name] = view.getFloat32(this.o, LITTLE_ENDIAN); - this.o += 4; - }; - item.sz = 4; - break; - case ENTupleColumnType.kInt64: - case ENTupleColumnType.kIndex64: - item.func = function(view, obj) { - obj[this.name] = view.getBigInt64(this.o, LITTLE_ENDIAN); - this.o += 8; - }; - item.sz = 8; - break; - case ENTupleColumnType.kUInt64: - item.func = function(view, obj) { - obj[this.name] = view.getBigUint64(this.o, LITTLE_ENDIAN); - this.o += 8; - }; - item.sz = 8; - break; - case ENTupleColumnType.kInt32: - case ENTupleColumnType.kIndex32: - item.func = function(view, obj) { - obj[this.name] = view.getInt32(this.o, LITTLE_ENDIAN); - this.o += 4; - }; - item.sz = 4; - break; - case ENTupleColumnType.kUInt32: - item.func = function(view, obj) { - obj[this.name] = view.getUint32(this.o, LITTLE_ENDIAN); - this.o += 4; - }; - item.sz = 4; - break; - case ENTupleColumnType.kInt16: - item.func = function(view, obj) { - obj[this.name] = view.getInt16(this.o, LITTLE_ENDIAN); - this.o += 2; - }; - item.sz = 2; - break; - case ENTupleColumnType.kUInt16: - item.func = function(view, obj) { - obj[this.name] = view.getUint16(this.o, LITTLE_ENDIAN); - this.o += 2; - }; - item.sz = 2; - break; - case ENTupleColumnType.kInt8: - item.func = function(view, obj) { - obj[this.name] = view.getInt8(this.o++); - }; - item.sz = 1; - break; - case ENTupleColumnType.kUInt8: - case ENTupleColumnType.kByte: - item.func = function(view, obj) { - obj[this.name] = view.getUint8(this.o++); - }; - item.sz = 1; - break; - case ENTupleColumnType.kChar: - if (item0) { - item.namecnt = item0.name; - item.func = function(view, obj) { - const len = obj[this.namecnt]; - let s = ''; - for (let i = 0; i < len; ++i) - s += String.fromCharCode(view.getInt8(this.o++)); - obj[this.name] = s; - }; - item.shift = function() { - console.log('not implemented'); + } + + assignReadFunc(item0) { + switch (this.coltype) { + case ENTupleColumnType.kBit: { + this.func = function(obj) { + console.log('bit reading not properly implemented'); + obj[this.name] = false; + this.o++; }; - } else { - item.func = function(view, obj) { - obj[this.name] = String.fromCharCode(view.getInt8(this.o++)); + this.shift = function() { + console.log('bit shift not implemented'); }; - item.sz = 1; + break; } - break; - default: - throw new Error(`Unsupported column type: ${item.coltype}`); + case ENTupleColumnType.kReal64: + this.func = function(obj) { + obj[this.name] = this.view.getFloat64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + this.sz = 8; + break; + case ENTupleColumnType.kReal32: + this.func = function(obj) { + obj[this.name] = this.view.getFloat32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + this.sz = 4; + break; + case ENTupleColumnType.kInt64: + case ENTupleColumnType.kIndex64: + this.func = function(obj) { + obj[this.name] = this.view.getBigInt64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + this.sz = 8; + break; + case ENTupleColumnType.kUInt64: + this.func = function(obj) { + obj[this.name] = this.view.getBigUint64(this.o, LITTLE_ENDIAN); + this.o += 8; + }; + this.sz = 8; + break; + case ENTupleColumnType.kInt32: + case ENTupleColumnType.kIndex32: + this.func = function(obj) { + obj[this.name] = this.view.getInt32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + this.sz = 4; + break; + case ENTupleColumnType.kUInt32: + this.func = function(obj) { + obj[this.name] = this.view.getUint32(this.o, LITTLE_ENDIAN); + this.o += 4; + }; + this.sz = 4; + break; + case ENTupleColumnType.kInt16: + this.func = function(obj) { + obj[this.name] = this.view.getInt16(this.o, LITTLE_ENDIAN); + this.o += 2; + }; + this.sz = 2; + break; + case ENTupleColumnType.kUInt16: + this.func = function(obj) { + obj[this.name] = this.view.getUint16(this.o, LITTLE_ENDIAN); + this.o += 2; + }; + this.sz = 2; + break; + case ENTupleColumnType.kInt8: + this.func = function(obj) { + obj[this.name] = this.view.getInt8(this.o++); + }; + this.sz = 1; + break; + case ENTupleColumnType.kUInt8: + case ENTupleColumnType.kByte: + this.func = function(obj) { + obj[this.name] = this.view.getUint8(this.o++); + }; + this.sz = 1; + break; + case ENTupleColumnType.kChar: + if (item0) { + this.namecnt = item0.name; + this.func = function(obj) { + const len = obj[this.namecnt]; + let s = ''; + for (let i = 0; i < len; ++i) + s += String.fromCharCode(this.view.getInt8(this.o++)); + obj[this.name] = s; + }; + this.shift = function() { + console.log('not implemented for string'); + }; + } else { + this.func = function(obj) { + obj[this.name] = String.fromCharCode(this.view.getInt8(this.o++)); + }; + this.sz = 1; + } + break; + default: + throw new Error(`Unsupported column type: ${this.coltype}`); + } + + if (this.sz && !this.shift) { + this.shift = function(entries) { + this.o += entries * this.sz; + }; + } } - if (item.sz && !item.shift) { - item.shift = function(entries) { - this.o += entries * this.sz; - }; + async unzipBlob(blob, cluster_locations) { + const colEntry = cluster_locations[this.id], // Access column entry + page = colEntry.pages[this.page], // requested page + numElements = Number(page.numElements), + elementSize = this.column.bitsOnStorage / 8; + + let expectedSize = numElements * elementSize; + // Special handling for boolean fields + if (this.coltype === ENTupleColumnType.kBit) + expectedSize = Math.ceil(numElements / 8); + + // Check if data is compressed + if ((colEntry.compression === 0) || (blob.byteLength === expectedSize)) + return blob; // Uncompressed: use blob directly + + // Try decompression + return R__unzip(blob, expectedSize).then(result => { + return result || blob; // Fallback to original blob ?? + }).catch(err => { + throw new Error(`Failed to unzip page for column ${this.id}: ${err.message}`); + }); } -} -function reconstructBlob(blob, columnDescriptor) { - const originalColtype = columnDescriptor.coltype, - data = recontructUnsplitBuffer(blob, columnDescriptor); + reconstructBlob(rawblob) { + if (!(rawblob instanceof DataView)) + throw new Error(`Invalid blob type for column ${this.id}: ${Object.prototype.toString.call(rawblob)}`); - // Handle split index types - if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) - return DecodeDeltaIndex(data.blob, data.coltype).blob; + const originalColtype = this.column.coltype, + data = recontructUnsplitBuffer(rawblob, this.column); - // Handle Split Signed Int types - if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) - return decodeZigzag(data.blob, data.coltype).blob; + let blob; + // Handle split index types + if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) + blob = DecodeDeltaIndex(data.blob, data.coltype).blob; + // Handle Split Signed Int types + else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) + blob = decodeZigzag(data.blob, data.coltype).blob; + else + blob = data.blob; + + this.view = new DataView(blob); + this.o = 0; + } - return data.blob; } + + async function rntupleProcess(rntuple, selector, args) { const handle = { rntuple, // keep rntuple reference @@ -1289,40 +1338,15 @@ async function rntupleProcess(rntuple, selector, args) { return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], - unzipPromises = blobs.map((blob, idx) => { - const item = itemsToRead[idx], - colEntry = locations[item.id], // Access column entry - page = colEntry.pages[item.page], // requested page - numElements = Number(page.numElements), - elementSize = item.column.bitsOnStorage / 8; - - let expectedSize = numElements * elementSize; - // Special handling for boolean fields - if (item.coltype === ENTupleColumnType.kBit) - expectedSize = Math.ceil(numElements / 8); - - // Check if data is compressed - if ((colEntry.compression === 0) || (blob.byteLength === expectedSize)) - return Promise.resolve(blob); // Uncompressed: use blob directly - - // Try decompression - return R__unzip(blob, expectedSize).then(result => { - return result || blob; // Fallback to original blob ?? - }).catch(err => { - throw new Error(`Failed to unzip page ${idx}: ${err.message}`); - }); - }); + unzipPromises = blobs.map((blob, idx) => itemsToRead[idx].unzipBlob(blob, locations)); return Promise.all(unzipPromises).then(unzipBlobs => { - for (let i = 0; i < unzipBlobs.length; ++i) { - const rawblob = unzipBlobs[i], - item = itemsToRead[i]; - // Ensure blob is a DataView - if (!(rawblob instanceof DataView)) - throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(rawblob)}`); - - item.view = new DataView(reconstructBlob(rawblob, item.column)); - item.o = 0; + for (let idx = 0; idx < unzipBlobs.length; ++idx) { + const rawblob = unzipBlobs[idx], + item = itemsToRead[idx]; + + item.reconstructBlob(rawblob); + if (item.e0 > handle.current_entry) item.shift(handle.current_entry - item.e0); // FIXME - string will not work this way } @@ -1332,7 +1356,7 @@ async function rntupleProcess(rntuple, selector, args) { while (hasData) { for (let i = 0; i < handle.arr.length; ++i) { const item = handle.arr[i]; - item.func(item.view, selector.tgtobj); + item.func(selector.tgtobj); if (++item.e0 >= item.e1) { delete item.view; // data is over hasData = false; @@ -1368,21 +1392,9 @@ async function rntupleProcess(rntuple, selector, args) { for (let k = 0; k < columns.length; ++k) { // TODO - make extra class for the item - const item = { - column: columns[k], - id: columns[k].index, - coltype: columns[k].coltype, - splittype: 0, - page: -1 // current page for the reading - }; - - // special handling of split types - if ((item.coltype >= ENTupleColumnType.kSplitInt16) && (item.coltype <= ENTupleColumnType.kSplitIndex64)) { - item.splittype = item.coltype; - item.coltype -= (ENTupleColumnType.kSplitInt16 - ENTupleColumnType.kInt16); - } + const item = new ReaderItem(columns[k]); - assignReadFunc(item, item0); + item.assignReadFunc(item0); item.name = selector.nameOfBranch(i); // target object name From 197bdf017c8601c8a3c84d77ecc395a906987533 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 13:18:19 +0100 Subject: [PATCH 09/17] [rntuple] try to decode std::string --- modules/rntuple.mjs | 119 +++++++++++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 47 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index ed64d8d39..d9c774714 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1091,12 +1091,14 @@ async function readNextCluster(rntuple, selector) { class ReaderItem { - constructor(column) { + constructor(column, name) { this.column = column; this.id = column.index; this.coltype = column.coltype; this.splittype = 0; this.page = -1; // current page for the reading + this.name = name; + this.simple = true; // simple type, no index, no refs // special handling of split types if ((this.coltype >= ENTupleColumnType.kSplitInt16) && (this.coltype <= ENTupleColumnType.kSplitIndex64)) { @@ -1108,6 +1110,7 @@ class ReaderItem { assignReadFunc(item0) { switch (this.coltype) { case ENTupleColumnType.kBit: { + this.simple = false; this.func = function(obj) { console.log('bit reading not properly implemented'); obj[this.name] = false; @@ -1191,9 +1194,11 @@ class ReaderItem { break; case ENTupleColumnType.kChar: if (item0) { - this.namecnt = item0.name; + this.simple = item0.simple = false; // no shift can be used + this.namecnt = item0.name = `__${this.name}__cnt__`; this.func = function(obj) { - const len = obj[this.namecnt]; + const len = Number(obj[this.namecnt]); + console.log('reading len', len); let s = ''; for (let i = 0; i < len; ++i) s += String.fromCharCode(this.view.getInt8(this.o++)); @@ -1250,25 +1255,27 @@ class ReaderItem { const originalColtype = this.column.coltype, data = recontructUnsplitBuffer(rawblob, this.column); - let blob; // Handle split index types - if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) - blob = DecodeDeltaIndex(data.blob, data.coltype).blob; + if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) { + console.log(this.name, 'split first'); + this.view = new DataView(DecodeDeltaIndex(data.blob, data.coltype).blob.buffer); // Handle Split Signed Int types - else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) - blob = decodeZigzag(data.blob, data.coltype).blob; - else - blob = data.blob; + } else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) { + console.log(this.name, 'split second'); + this.view = new DataView(decodeZigzag(data.blob, data.coltype).blob.buffer); + } else if (data.blob instanceof DataView) { + console.log(this.name, 'get data as DataView'); + this.view = data.blob; + } else { + console.log(this.name, 'get data as buffer'); + this.view = new DataView(data.blob); + } - this.view = new DataView(blob); this.o = 0; } } - - - async function rntupleProcess(rntuple, selector, args) { const handle = { rntuple, // keep rntuple reference @@ -1276,6 +1283,7 @@ async function rntupleProcess(rntuple, selector, args) { selector, // reference on selector arr: [], // list of special handles per columns, more than one column per field may exist curr: -1, // current entry ID + simple: true, // if all columns are simple data types which can be manipulated easily current_cluster: -1, // current cluster to process current_cluster_first_entry: 0, // first entry in current cluster current_entry: -1, // current processed entry @@ -1340,38 +1348,58 @@ async function rntupleProcess(rntuple, selector, args) { const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], unzipPromises = blobs.map((blob, idx) => itemsToRead[idx].unzipBlob(blob, locations)); - return Promise.all(unzipPromises).then(unzipBlobs => { - for (let idx = 0; idx < unzipBlobs.length; ++idx) { - const rawblob = unzipBlobs[idx], - item = itemsToRead[idx]; - - item.reconstructBlob(rawblob); - - if (item.e0 > handle.current_entry) - item.shift(handle.current_entry - item.e0); // FIXME - string will not work this way + return Promise.all(unzipPromises); + }).then(unzipBlobs => { + let need_plain_skip = false; + for (let idx = 0; idx < unzipBlobs.length; ++idx) { + const rawblob = unzipBlobs[idx], + item = itemsToRead[idx]; + + item.reconstructBlob(rawblob); + + if (item.e0 > handle.current_entry) { + if (handle.simple) { + item.shift(handle.current_entry - item.e0); + item.e0 = handle.current_entry; + } else + need_plain_skip = true; } + } - let hasData = true; - - while (hasData) { - for (let i = 0; i < handle.arr.length; ++i) { - const item = handle.arr[i]; + // allign all collumns to the next processing event + while (need_plain_skip) { + let isany = false; + for (let i = 0; i < handle.arr.length; ++i) { + const item = handle.arr[i]; + if (item.e0 < handle.current_entry) { item.func(selector.tgtobj); - if (++item.e0 >= item.e1) { - delete item.view; // data is over - hasData = false; - } + isany = true; + item.e0++; } - selector.Process(handle.current_entry++); + } + need_plain_skip = isany; + } + + let hasData = true; - if (handle.current_entry >= handle.process_max) { - selector.Terminate(true); - return true; + while (hasData) { + for (let i = 0; i < handle.arr.length; ++i) { + const item = handle.arr[i]; + item.func(selector.tgtobj); + if (++item.e0 >= item.e1) { + delete item.view; // data is over + hasData = false; } } + selector.Process(handle.current_entry++); - return readNextPortion(); - }); + if (handle.current_entry >= handle.process_max) { + selector.Terminate(true); + return true; + } + } + + return readNextPortion(); }); } @@ -1391,21 +1419,18 @@ async function rntupleProcess(rntuple, selector, args) { let item0 = null; for (let k = 0; k < columns.length; ++k) { - // TODO - make extra class for the item - const item = new ReaderItem(columns[k]); + const item = new ReaderItem(columns[k], selector.nameOfBranch(i)); item.assignReadFunc(item0); - item.name = selector.nameOfBranch(i); // target object name - - // case when two columns read like for the std::string, - // but most probably for some other cases - if ((columns.length === 2) && (k === 0)) { - item.name = `___indx${i}`; // TODO - use not a target object in the future + // if there are two columns, first used as length + if (k === 0) item0 = item; - } handle.arr.push(item); + + if (!item.simple) + handle.simple = false; } } From 708e75bc41bed258d4f6ba0a1f101a201c170191 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Mon, 9 Feb 2026 17:16:36 +0100 Subject: [PATCH 10/17] [rntuple] read all pages from the cluster --- modules/rntuple.mjs | 271 +++++++++++++++++++++----------------------- 1 file changed, 130 insertions(+), 141 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index d9c774714..7e9204699 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1099,6 +1099,7 @@ class ReaderItem { this.page = -1; // current page for the reading this.name = name; this.simple = true; // simple type, no index, no refs + this.sz = 1; // special handling of split types if ((this.coltype >= ENTupleColumnType.kSplitInt16) && (this.coltype <= ENTupleColumnType.kSplitIndex64)) { @@ -1107,7 +1108,31 @@ class ReaderItem { } } - assignReadFunc(item0) { + init_o() { + this.o = 0; + this.view = this.views.shift(); + this.view_len = this.view.byteLength; + } + + shift_o(sz) { + this.o += sz; + while ((this.o >= this.view_len) && this.view_len) { + this.o -= this.view_len; + if (this.views.length) { + this.view = this.views.shift(); + this.view_len = this.view.byteLength; + } else { + this.view = null; + this.view_len = 0; + } + } + } + + shift(entries) { + this.shift_o(this.sz * entries); + } + + assignReadFunc() { switch (this.coltype) { case ENTupleColumnType.kBit: { this.simple = false; @@ -1124,14 +1149,14 @@ class ReaderItem { case ENTupleColumnType.kReal64: this.func = function(obj) { obj[this.name] = this.view.getFloat64(this.o, LITTLE_ENDIAN); - this.o += 8; + this.shift_o(8); }; this.sz = 8; break; case ENTupleColumnType.kReal32: this.func = function(obj) { obj[this.name] = this.view.getFloat32(this.o, LITTLE_ENDIAN); - this.o += 4; + this.shift_o(4); }; this.sz = 4; break; @@ -1139,14 +1164,14 @@ class ReaderItem { case ENTupleColumnType.kIndex64: this.func = function(obj) { obj[this.name] = this.view.getBigInt64(this.o, LITTLE_ENDIAN); - this.o += 8; + this.shift_o(8); }; this.sz = 8; break; case ENTupleColumnType.kUInt64: this.func = function(obj) { obj[this.name] = this.view.getBigUint64(this.o, LITTLE_ENDIAN); - this.o += 8; + this.shift_o(8); }; this.sz = 8; break; @@ -1154,80 +1179,107 @@ class ReaderItem { case ENTupleColumnType.kIndex32: this.func = function(obj) { obj[this.name] = this.view.getInt32(this.o, LITTLE_ENDIAN); - this.o += 4; + this.shift_o(4); }; this.sz = 4; break; case ENTupleColumnType.kUInt32: this.func = function(obj) { obj[this.name] = this.view.getUint32(this.o, LITTLE_ENDIAN); - this.o += 4; + this.shift_o(4); }; this.sz = 4; break; case ENTupleColumnType.kInt16: this.func = function(obj) { obj[this.name] = this.view.getInt16(this.o, LITTLE_ENDIAN); - this.o += 2; + this.shift_o(2); }; this.sz = 2; break; case ENTupleColumnType.kUInt16: this.func = function(obj) { obj[this.name] = this.view.getUint16(this.o, LITTLE_ENDIAN); - this.o += 2; + this.shift_o(2); }; this.sz = 2; break; case ENTupleColumnType.kInt8: this.func = function(obj) { - obj[this.name] = this.view.getInt8(this.o++); + obj[this.name] = this.view.getInt8(this.o); + this.shift_o(1); }; this.sz = 1; break; case ENTupleColumnType.kUInt8: case ENTupleColumnType.kByte: this.func = function(obj) { - obj[this.name] = this.view.getUint8(this.o++); + obj[this.name] = this.view.getUint8(this.o); + this.shift_o(1); }; this.sz = 1; break; case ENTupleColumnType.kChar: - if (item0) { - this.simple = item0.simple = false; // no shift can be used - this.namecnt = item0.name = `__${this.name}__cnt__`; - this.func = function(obj) { - const len = Number(obj[this.namecnt]); - console.log('reading len', len); - let s = ''; - for (let i = 0; i < len; ++i) - s += String.fromCharCode(this.view.getInt8(this.o++)); - obj[this.name] = s; - }; - this.shift = function() { - console.log('not implemented for string'); - }; - } else { - this.func = function(obj) { - obj[this.name] = String.fromCharCode(this.view.getInt8(this.o++)); - }; - this.sz = 1; - } + this.func = function(obj) { + obj[this.name] = String.fromCharCode(this.view.getInt8(this.o)); + this.shift_o(1); + }; + this.sz = 1; break; default: throw new Error(`Unsupported column type: ${this.coltype}`); } + } - if (this.sz && !this.shift) { - this.shift = function(entries) { - this.o += entries * this.sz; - }; + assignStringReader(item1) { + this.item1 = item1; + this.off0 = 0; + this.$tgt = {}; + + item1.func0 = item1.func; + item1.shift0 = item1.shift; + + // assign noop + item1.func = item1.shift = () => {}; + + this.func = function(tgtobj) { + this.item1.func0(this.$tgt); + const off = Number(this.$tgt[this.name]); + let len = off - this.off0, s = ''; + while (len-- > 0) { + s += String.fromCharCode(this.view.getInt8(this.o)); + this.shift_o(1); + } + tgtobj[this.name] = s; + this.off0 = off; + }; + + this.shift = function(entries) { + if (entries > 0) { + this.item1.shift0(entries); + this.item1.func0(this.$tgt); + this.off0 = Number(this.$tgt[this.name]); + this.shift_o(this.off0); + } + }; + } + + collectPages(cluster_locations, dataToRead, itemsToRead, pagesToRead) { + const pages = cluster_locations[this.id].pages; + for (let p = 0; p < pages.length; ++p) { + const locator = pages[p].locator; + itemsToRead.push(this); + dataToRead.push(Number(locator.offset), locator.size); + pagesToRead.push(pages[p]); } + this.views = []; + this.view = null; + this.view_len = 0; + this.o = 0; } - async unzipBlob(blob, cluster_locations) { + async unzipBlob(blob, cluster_locations, page) { const colEntry = cluster_locations[this.id], // Access column entry - page = colEntry.pages[this.page], // requested page numElements = Number(page.numElements), elementSize = this.column.bitsOnStorage / 8; @@ -1255,23 +1307,25 @@ class ReaderItem { const originalColtype = this.column.coltype, data = recontructUnsplitBuffer(rawblob, this.column); + let view; + // Handle split index types if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) { console.log(this.name, 'split first'); - this.view = new DataView(DecodeDeltaIndex(data.blob, data.coltype).blob.buffer); + view = new DataView(DecodeDeltaIndex(data.blob, data.coltype).blob.buffer); // Handle Split Signed Int types } else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) { console.log(this.name, 'split second'); - this.view = new DataView(decodeZigzag(data.blob, data.coltype).blob.buffer); + view = new DataView(decodeZigzag(data.blob, data.coltype).blob.buffer); } else if (data.blob instanceof DataView) { console.log(this.name, 'get data as DataView'); - this.view = data.blob; + view = data.blob; } else { console.log(this.name, 'get data as buffer'); - this.view = new DataView(data.blob); + view = new DataView(data.blob); } - this.o = 0; + this.views.push(view); } } @@ -1284,9 +1338,10 @@ async function rntupleProcess(rntuple, selector, args) { arr: [], // list of special handles per columns, more than one column per field may exist curr: -1, // current entry ID simple: true, // if all columns are simple data types which can be manipulated easily - current_cluster: -1, // current cluster to process + current_cluster: 0, // current cluster to process current_cluster_first_entry: 0, // first entry in current cluster - current_entry: -1, // current processed entry + current_cluster_last_entry: 0, // last entry in current cluster + current_entry: 0, // current processed entry simple_read: true, // all baskets in all used branches are in sync, process_arrays: false, // one can process all branches as arrays firstentry: 0, // first entry in the rntuple @@ -1294,16 +1349,9 @@ async function rntupleProcess(rntuple, selector, args) { }; function readNextPortion(inc_cluster) { - if (inc_cluster !== undefined) { - if (inc_cluster === 1) - handle.current_cluster_first_entry += rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; - handle.current_cluster += inc_cluster; - handle.arr.forEach(item => { - item.page = -1; - item.e0 = handle.current_cluster_first_entry; - if (item.view) - throw new Error(`still data when processing column ${item.name}`); - }); + if (inc_cluster) { + handle.current_cluster++; + handle.current_cluster_first_entry = handle.current_cluster_last_entry; } const locations = rntuple.builder.pageLocations[handle.current_cluster]; @@ -1311,95 +1359,41 @@ async function rntupleProcess(rntuple, selector, args) { selector.Terminate(true); return selector; } - let end_of_cluster = 0; - const dataToRead = [], itemsToRead = []; - - // loop over all columns and request buffer - for (let i = 0; i < handle.arr.length; ++i) { - const item = handle.arr[i]; - - if (!item.view) { - const pages = locations[item.id].pages; - while (++item.page < pages.length) { - const page = pages[item.page]; - // if current entry inside the page - read buffer - if (handle.current_entry < item.e0 + Number(page.numElements)) - break; - item.e0 += Number(page.numElements); - } - if (item.page >= pages.length) { - end_of_cluster++; - item.page = -1; - } else { - item.e1 = item.e0 + Number(pages[item.page].numElements); - itemsToRead.push(item); - dataToRead.push(Number(pages[item.page].locator.offset), pages[item.page].locator.size); - } - } - } - if (end_of_cluster) { - if (end_of_cluster !== handle.arr.length) - throw new Error('Missmatch at the cluster boundary'); - return readNextPortion(1); - } + handle.current_cluster_last_entry = handle.current_cluster_first_entry + rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; + + const dataToRead = [], itemsToRead = [], pagesToRead = []; + + // loop over all columns and request all pages + for (let i = 0; i < handle.arr.length; ++i) + handle.arr[i].collectPages(locations, dataToRead, itemsToRead, pagesToRead); return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], - unzipPromises = blobs.map((blob, idx) => itemsToRead[idx].unzipBlob(blob, locations)); - + unzipPromises = blobs.map((blob, idx) => itemsToRead[idx].unzipBlob(blob, locations, pagesToRead[idx])); return Promise.all(unzipPromises); }).then(unzipBlobs => { - let need_plain_skip = false; - for (let idx = 0; idx < unzipBlobs.length; ++idx) { - const rawblob = unzipBlobs[idx], - item = itemsToRead[idx]; - - item.reconstructBlob(rawblob); - - if (item.e0 > handle.current_entry) { - if (handle.simple) { - item.shift(handle.current_entry - item.e0); - item.e0 = handle.current_entry; - } else - need_plain_skip = true; - } - } + unzipBlobs.map((rawblob, idx) => itemsToRead[idx].reconstructBlob(rawblob)); - // allign all collumns to the next processing event - while (need_plain_skip) { - let isany = false; - for (let i = 0; i < handle.arr.length; ++i) { - const item = handle.arr[i]; - if (item.e0 < handle.current_entry) { - item.func(selector.tgtobj); - isany = true; - item.e0++; - } - } - need_plain_skip = isany; + for (let indx = 0; indx < handle.arr.length; ++indx) { + handle.arr[indx].init_o(); + if (handle.current_entry > handle.current_cluster_first_entry) + handle.arr[indx].shift(handle.current_entry - handle.current_cluster_first_entry); } - let hasData = true; + while (handle.current_entry < handle.current_cluster_last_entry) { + for (let i = 0; i < handle.arr.length; ++i) + handle.arr[i].func(selector.tgtobj); - while (hasData) { - for (let i = 0; i < handle.arr.length; ++i) { - const item = handle.arr[i]; - item.func(selector.tgtobj); - if (++item.e0 >= item.e1) { - delete item.view; // data is over - hasData = false; - } - } selector.Process(handle.current_entry++); if (handle.current_entry >= handle.process_max) { selector.Terminate(true); - return true; + return selector; } } - return readNextPortion(); + return readNextPortion(true); }); } @@ -1412,25 +1406,20 @@ async function rntupleProcess(rntuple, selector, args) { if (!name) throw new Error(`Not able to extract name for field ${i}`); + // TODO: fieldToColumns can be out out const columns = rntuple.fieldToColumns[name]; if (!columns) throw new Error(`No columns found for field '${name}' in RNTuple`); - let item0 = null; - - for (let k = 0; k < columns.length; ++k) { - const item = new ReaderItem(columns[k], selector.nameOfBranch(i)); - - item.assignReadFunc(item0); - - // if there are two columns, first used as length - if (k === 0) - item0 = item; - - handle.arr.push(item); + const tgtname = selector.nameOfBranch(i), + item = new ReaderItem(columns[0], tgtname); + item.assignReadFunc(); + handle.arr.push(item); - if (!item.simple) - handle.simple = false; + if (columns.length === 2) { + const item2 = new ReaderItem(columns[1], tgtname); + item2.assignStringReader(item); + handle.arr.push(item2); } } @@ -1470,7 +1459,7 @@ async function rntupleProcess(rntuple, selector, args) { selector.Begin(rntuple); - return readNextPortion(0); + return readNextPortion(); }).then(() => selector); } From 0806f49e60c8f531dce0e99f486957fa0f1539f6 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 12:18:12 +0100 Subject: [PATCH 11/17] [rntuple] implement new bits reading Also correct shift for the members without size value --- modules/rntuple.mjs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 7e9204699..d2cb0878d 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1110,6 +1110,7 @@ class ReaderItem { init_o() { this.o = 0; + this.o2 = 0; // for bit count this.view = this.views.shift(); this.view_len = this.view.byteLength; } @@ -1129,7 +1130,12 @@ class ReaderItem { } shift(entries) { - this.shift_o(this.sz * entries); + if (this.sz) + this.shift_o(this.sz * entries); + else { + while (entries-- > 0) + this.func({}); + } } assignReadFunc() { @@ -1137,12 +1143,13 @@ class ReaderItem { case ENTupleColumnType.kBit: { this.simple = false; this.func = function(obj) { - console.log('bit reading not properly implemented'); - obj[this.name] = false; - this.o++; - }; - this.shift = function() { - console.log('bit shift not implemented'); + if (this.o2 === 0) + this.byte = this.view.getUint8(this.o); + obj[this.name] = ((this.byte >>> this.o2++) & 1) === 1; + if (this.o2 === 8) { + this.o2 = 0; + this.shift_o(1); + } }; break; } From 8f43a5b529677e67e2d1454f71c4690f115bd1fd Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 12:29:50 +0100 Subject: [PATCH 12/17] [rntuple] remove log output --- modules/rntuple.mjs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index d2cb0878d..96d7a9c2c 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1317,20 +1317,15 @@ class ReaderItem { let view; // Handle split index types - if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) { - console.log(this.name, 'split first'); + if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) view = new DataView(DecodeDeltaIndex(data.blob, data.coltype).blob.buffer); // Handle Split Signed Int types - } else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) { - console.log(this.name, 'split second'); + else if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) view = new DataView(decodeZigzag(data.blob, data.coltype).blob.buffer); - } else if (data.blob instanceof DataView) { - console.log(this.name, 'get data as DataView'); + else if (data.blob instanceof DataView) view = data.blob; - } else { - console.log(this.name, 'get data as buffer'); + else view = new DataView(data.blob); - } this.views.push(view); } From a13d980f2fbae013acf0fe60d8695732c28cab4f Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 12:45:34 +0100 Subject: [PATCH 13/17] [rntuple] do not use BigInt for a time been in readout Convert into Number to simplify processing. Later - once TTree::Draw supports BigInt - one can revert this changes --- modules/rntuple.mjs | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 96d7a9c2c..c6be87f4c 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1170,14 +1170,16 @@ class ReaderItem { case ENTupleColumnType.kInt64: case ENTupleColumnType.kIndex64: this.func = function(obj) { - obj[this.name] = this.view.getBigInt64(this.o, LITTLE_ENDIAN); + // FIXME: let process BigInt in the TTree::Draw + obj[this.name] = Number(this.view.getBigInt64(this.o, LITTLE_ENDIAN)); this.shift_o(8); }; this.sz = 8; break; case ENTupleColumnType.kUInt64: this.func = function(obj) { - obj[this.name] = this.view.getBigUint64(this.o, LITTLE_ENDIAN); + // FIXME: let process BigInt in the TTree::Draw + obj[this.name] = Number(this.view.getBigUint64(this.o, LITTLE_ENDIAN)); this.shift_o(8); }; this.sz = 8; @@ -1502,30 +1504,6 @@ class TDrawSelectorTuple extends TDrawSelector { /** @summary Returns true if field can be used as array */ isArrayBranch(/* tuple, br */) { return false; } - /** @summary Begin of processing */ - Begin(tree) { - super.Begin(tree); - this._first_entry = true; - this._has_bigint = false; - } - - /** @summary Process entry */ - Process(entry) { - if (this._first_entry || this._has_bigint) { - for (let n = 0; n < this.numBranches(); ++n) { - const name = this.nameOfBranch(n); - if (typeof this.tgtobj[name] === 'bigint') { - this.tgtobj[name] = Number(this.tgtobj[name]); - this._has_bigint = true; - } - } - } - - this._first_entry = false; - - super.Process(entry); - } - } // class TDrawSelectorTuple From ecb87f93ebca03f5217d328b86e20f0d1436f9e1 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 13:47:54 +0100 Subject: [PATCH 14/17] [rntuple] try to read pages only for selected entries --- modules/rntuple.mjs | 85 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index c6be87f4c..43c10883d 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1099,7 +1099,7 @@ class ReaderItem { this.page = -1; // current page for the reading this.name = name; this.simple = true; // simple type, no index, no refs - this.sz = 1; + this.sz = 0; // special handling of split types if ((this.coltype >= ENTupleColumnType.kSplitInt16) && (this.coltype <= ENTupleColumnType.kSplitIndex64)) { @@ -1138,6 +1138,9 @@ class ReaderItem { } } + /** @summary Simple column which fixed element size */ + is_simple() { return this.sz > 0; } + assignReadFunc() { switch (this.coltype) { case ENTupleColumnType.kBit: { @@ -1273,23 +1276,40 @@ class ReaderItem { }; } - collectPages(cluster_locations, dataToRead, itemsToRead, pagesToRead) { + collectPages(cluster_locations, dataToRead, itemsToRead, pagesToRead, emin, emax, elist) { const pages = cluster_locations[this.id].pages; - for (let p = 0; p < pages.length; ++p) { - const locator = pages[p].locator; - itemsToRead.push(this); - dataToRead.push(Number(locator.offset), locator.size); - pagesToRead.push(pages[p]); - } - this.views = []; + + this.views = new Array(pages.length); this.view = null; this.view_len = 0; this.o = 0; + this.o2 = 0; + + let e0 = 0; + for (let p = 0; p < pages.length; ++p) { + const page = pages[p], e1 = e0 + Number(page.numElements); + + let is_entries_inside = false; + if (elist?.length) + elist.forEach(e => { is_entries_inside ||= (e >= e0) && (e < e1); }); + else + is_entries_inside = ((e0 >= emin) && (e0 < emax)) || ((e1 > emin) && (e1 <= emax)); + + if (!this.is_simple() || is_entries_inside) { + itemsToRead.push(this); + dataToRead.push(Number(page.locator.offset), page.locator.size); + pagesToRead.push(p); + this.views[p] = null; // placeholder, filled after request + } else + this.views[p] = { byteLength: this.sz * Number(page.numElements) }; // dummy entry only to allow proper navigation + + e0 = e1; + } } - async unzipBlob(blob, cluster_locations, page) { + async unzipBlob(blob, cluster_locations, page_indx) { const colEntry = cluster_locations[this.id], // Access column entry - numElements = Number(page.numElements), + numElements = Number(colEntry.pages[page_indx].numElements), elementSize = this.column.bitsOnStorage / 8; let expectedSize = numElements * elementSize; @@ -1305,11 +1325,11 @@ class ReaderItem { return R__unzip(blob, expectedSize).then(result => { return result || blob; // Fallback to original blob ?? }).catch(err => { - throw new Error(`Failed to unzip page for column ${this.id}: ${err.message}`); + throw new Error(`Failed to unzip page ${page_indx} for column ${this.id}: ${err.message}`); }); } - reconstructBlob(rawblob) { + reconstructBlob(rawblob, page_indx) { if (!(rawblob instanceof DataView)) throw new Error(`Invalid blob type for column ${this.id}: ${Object.prototype.toString.call(rawblob)}`); @@ -1329,7 +1349,7 @@ class ReaderItem { else view = new DataView(data.blob); - this.views.push(view); + this.views[page_indx] = view; } } @@ -1364,20 +1384,38 @@ async function rntupleProcess(rntuple, selector, args) { return selector; } - handle.current_cluster_last_entry = handle.current_cluster_first_entry + rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; + const numClusterEntries = rntuple.builder.clusterSummaries[handle.current_cluster].numEntries; + + handle.current_cluster_last_entry = handle.current_cluster_first_entry + numClusterEntries; - const dataToRead = [], itemsToRead = [], pagesToRead = []; + // calculate entries which can be extracted from the cluster + let emin, emax; + const dataToRead = [], itemsToRead = [], pagesToRead = [], elist = []; - // loop over all columns and request all pages + if (handle.process_entries) { + let i = handle.process_entries_indx; + while ((i < handle.process_entries.length) && (handle.process_entries[i] < handle.current_cluster_last_entry)) + elist.push(handle.process_entries[i++] - handle.current_cluster_first_entry); + emin = elist[0]; + emax = elist[elist.length - 1]; + } else { + emin = handle.current_entry - handle.current_cluster_first_entry; + emax = Math.min(numClusterEntries, handle.process_max - handle.current_cluster_first_entry); + } + + + console.log(emin, emax, elist); + + // loop over all columns and request required pages for (let i = 0; i < handle.arr.length; ++i) - handle.arr[i].collectPages(locations, dataToRead, itemsToRead, pagesToRead); + handle.arr[i].collectPages(locations, dataToRead, itemsToRead, pagesToRead, emin, emax, elist); return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], unzipPromises = blobs.map((blob, idx) => itemsToRead[idx].unzipBlob(blob, locations, pagesToRead[idx])); return Promise.all(unzipPromises); }).then(unzipBlobs => { - unzipBlobs.map((rawblob, idx) => itemsToRead[idx].reconstructBlob(rawblob)); + unzipBlobs.map((rawblob, idx) => itemsToRead[idx].reconstructBlob(rawblob, pagesToRead[idx])); for (let indx = 0; indx < handle.arr.length; ++indx) { handle.arr[indx].init_o(); @@ -1437,6 +1475,14 @@ async function rntupleProcess(rntuple, selector, args) { handle.process_min = handle.firstentry; handle.process_max = handle.lastentry; + if (args.elist) { + args.firstentry = args.elist.at(0); + args.numentries = args.elist.at(-1) - args.elist.at(0) + 1; + handle.process_entries = args.elist; + handle.process_entries_indx = 0; + handle.process_arrays = false; // do not use arrays process for selected entries + } + if (Number.isInteger(args.firstentry) && (args.firstentry > handle.firstentry) && (args.firstentry < handle.lastentry)) handle.process_min = args.firstentry; @@ -1444,7 +1490,6 @@ async function rntupleProcess(rntuple, selector, args) { handle.process_max = Math.min(handle.process_max, handle.process_min + args.numentries); // first check from which cluster one should start - for (let indx = 0, emin = 0; indx < rntuple.builder.clusterSummaries.length; ++indx) { const summary = rntuple.builder.clusterSummaries[indx], emax = emin + summary.numEntries; From 6cab0da6a9292bc58ce1fc77239bcbdb5a1b3ebb Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 15:59:46 +0100 Subject: [PATCH 15/17] [rntuple] use margin for entries selection for offset column It requires to read previous offset to get start/stop offset for string --- modules/rntuple.mjs | 56 ++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 43c10883d..45c5dda55 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1098,7 +1098,6 @@ class ReaderItem { this.splittype = 0; this.page = -1; // current page for the reading this.name = name; - this.simple = true; // simple type, no index, no refs this.sz = 0; // special handling of split types @@ -1108,6 +1107,12 @@ class ReaderItem { } } + cleanup() { + this.views = null; + this.view = null; + this.view_len = 0; + } + init_o() { this.o = 0; this.o2 = 0; // for bit count @@ -1144,7 +1149,6 @@ class ReaderItem { assignReadFunc() { switch (this.coltype) { case ENTupleColumnType.kBit: { - this.simple = false; this.func = function(obj) { if (this.o2 === 0) this.byte = this.view.getUint8(this.o); @@ -1243,6 +1247,9 @@ class ReaderItem { } } + /** @summary identify if this item used as offset for std::string or similar */ + is_offset_item() { return this.item1; } + assignStringReader(item1) { this.item1 = item1; this.off0 = 0; @@ -1280,20 +1287,18 @@ class ReaderItem { const pages = cluster_locations[this.id].pages; this.views = new Array(pages.length); - this.view = null; - this.view_len = 0; - this.o = 0; - this.o2 = 0; let e0 = 0; for (let p = 0; p < pages.length; ++p) { - const page = pages[p], e1 = e0 + Number(page.numElements); + const page = pages[p], + e1 = e0 + Number(page.numElements), + margin = this.is_offset_item() ? 1 : 0; // offset for previous entry has to be read as well let is_entries_inside = false; if (elist?.length) - elist.forEach(e => { is_entries_inside ||= (e >= e0) && (e < e1); }); + elist.forEach(e => { is_entries_inside ||= (e >= e0) && (e - margin < e1); }); else - is_entries_inside = ((e0 >= emin) && (e0 < emax)) || ((e1 > emin) && (e1 <= emax)); + is_entries_inside = ((e0 >= emin - margin) && (e0 < emax)) || ((e1 > emin - margin) && (e1 <= emax)); if (!this.is_simple() || is_entries_inside) { itemsToRead.push(this); @@ -1360,13 +1365,10 @@ async function rntupleProcess(rntuple, selector, args) { file: rntuple.$file, // keep file reference selector, // reference on selector arr: [], // list of special handles per columns, more than one column per field may exist - curr: -1, // current entry ID - simple: true, // if all columns are simple data types which can be manipulated easily current_cluster: 0, // current cluster to process current_cluster_first_entry: 0, // first entry in current cluster current_cluster_last_entry: 0, // last entry in current cluster current_entry: 0, // current processed entry - simple_read: true, // all baskets in all used branches are in sync, process_arrays: false, // one can process all branches as arrays firstentry: 0, // first entry in the rntuple lastentry: 0 // last entry in the rntuple @@ -1403,9 +1405,6 @@ async function rntupleProcess(rntuple, selector, args) { emax = Math.min(numClusterEntries, handle.process_max - handle.current_cluster_first_entry); } - - console.log(emin, emax, elist); - // loop over all columns and request required pages for (let i = 0; i < handle.arr.length; ++i) handle.arr[i].collectPages(locations, dataToRead, itemsToRead, pagesToRead, emin, emax, elist); @@ -1417,19 +1416,30 @@ async function rntupleProcess(rntuple, selector, args) { }).then(unzipBlobs => { unzipBlobs.map((rawblob, idx) => itemsToRead[idx].reconstructBlob(rawblob, pagesToRead[idx])); - for (let indx = 0; indx < handle.arr.length; ++indx) { + for (let indx = 0; indx < handle.arr.length; ++indx) handle.arr[indx].init_o(); - if (handle.current_entry > handle.current_cluster_first_entry) - handle.arr[indx].shift(handle.current_entry - handle.current_cluster_first_entry); - } + + let skip_entries = handle.current_entry - handle.current_cluster_first_entry; while (handle.current_entry < handle.current_cluster_last_entry) { - for (let i = 0; i < handle.arr.length; ++i) + for (let i = 0; i < handle.arr.length; ++i) { + if (skip_entries > 0) + handle.arr[i].shift(skip_entries); handle.arr[i].func(selector.tgtobj); + } + skip_entries = 0; - selector.Process(handle.current_entry++); + selector.Process(handle.current_entry); - if (handle.current_entry >= handle.process_max) { + if (handle.process_entries) { + if (++handle.process_entries_indx >= handle.process_entries.length) { + selector.Terminate(true); + return selector; + } + const prev_entry = handle.current_entry; + handle.current_entry = handle.process_entries[handle.process_entries_indx]; + skip_entries = handle.current_entry - prev_entry - 1; + } else if (++handle.current_entry >= handle.process_max) { selector.Terminate(true); return selector; } @@ -1504,7 +1514,7 @@ async function rntupleProcess(rntuple, selector, args) { if (handle.current_cluster < 0) throw new Error(`Not able to find cluster for entry ${handle.process_min} in the RNtuple`); - handle.current_entry = handle.staged_now = handle.process_min; + handle.current_entry = handle.process_min; selector.Begin(rntuple); From 87d930251cfba77882dc3aea1d752a4ca5229232 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 16:11:40 +0100 Subject: [PATCH 16/17] [rntuple] remove old implementation --- modules/rntuple.mjs | 141 -------------------------------------------- 1 file changed, 141 deletions(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 45c5dda55..12385689b 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -963,132 +963,6 @@ function getSelectorFieldName(selector, i) { return isStr(br) ? br : br?.fieldName; } -// Read and process the next data cluster from the RNTuple -async function readNextCluster(rntuple, selector) { - const builder = rntuple.builder; - - // Add validation - if (!builder.clusterSummaries) - throw new Error('No cluster summaries available - possibly incomplete file reading'); - - const clusterIndex = selector.currentCluster, - clusterSummary = builder.clusterSummaries[clusterIndex], - // Gather all pages for this cluster from selected fields only - pages = [], - // Collect only selected field names from selector - selectedFields = []; - - if (!clusterSummary) { - selector.Terminate(clusterIndex > 0); - return false; - } - - for (let i = 0; i < selector.numBranches(); ++i) - selectedFields.push(getSelectorFieldName(selector, i)); - - // For each selected field, collect its columns' pages - for (const fieldName of selectedFields) { - const columns = rntuple.fieldToColumns[fieldName]; - if (!columns) - throw new Error(`Selected field '${fieldName}' not found in RNTuple`); - - for (const colDesc of columns) { - const colEntry = builder.pageLocations[clusterIndex]?.[colDesc.index]; - - // When the data is missing or broken - if (!colEntry || !colEntry.pages) - throw new Error(`No pages for column ${colDesc.index} in cluster ${clusterIndex}`); - - for (const page of colEntry.pages) - pages.push({ page, colDesc, fieldName }); - } - } - - selector.currentCluster++; - - // Early exit if no pages to read (i.e., no selected fields matched) - if (pages.length === 0) { - selector.Terminate(false); - return false; - } - - // Build flat array of [offset, size, offset, size, ...] to read pages - const dataToRead = pages.flatMap(p => - [Number(p.page.locator.offset), Number(p.page.locator.size)] - ); - - return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { - const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], - unzipPromises = blobs.map((blob, idx) => { - const { page, colDesc } = pages[idx], - colEntry = builder.pageLocations[clusterIndex][colDesc.index], // Access column entry - numElements = Number(page.numElements), - elementSize = colDesc.bitsOnStorage / 8; - - // Check if data is compressed - if (colEntry.compression === 0) - return Promise.resolve(blob); // Uncompressed: use blob directly - const expectedSize = numElements * elementSize; - - // Special handling for boolean fields - if (colDesc.coltype === ENTupleColumnType.kBit) { - const expectedBoolSize = Math.ceil(numElements / 8); - if (blob.byteLength === expectedBoolSize) - return Promise.resolve(blob); - // Try decompression but catch errors for boolean fields - return R__unzip(blob, expectedBoolSize).catch(err => { - throw new Error(`Failed to unzip boolean page ${idx}: ${err.message}`); - }); - } - - // If the blob is already the expected size, treat as uncompressed - if (blob.byteLength === expectedSize) - return Promise.resolve(blob); - - // Try decompression - return R__unzip(blob, expectedSize).then(result => { - if (!result) - return blob; // Fallback to original blob - return result; - }).catch(err => { - throw new Error(`Failed to unzip page ${idx}: ${err.message}`); - }); - }); - - return Promise.all(unzipPromises).then(unzipBlobs => { - rntuple._clusterData = {}; // store deserialized data per column index - - for (let i = 0; i < unzipBlobs.length; ++i) { - const blob = unzipBlobs[i]; - // Ensure blob is a DataView - if (!(blob instanceof DataView)) - throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(blob)}`); - const colDesc = pages[i].colDesc, - values = builder.deserializePage(blob, colDesc, pages[i].page); - - // Support multiple representations (e.g., string fields with offsets + payload) - if (!rntuple._clusterData[colDesc.index]) - rntuple._clusterData[colDesc.index] = []; - - rntuple._clusterData[colDesc.index].push(values); - } - - const numEntries = clusterSummary.numEntries; - for (let i = 0; i < numEntries; ++i) { - for (let b = 0; b < selector.numBranches(); ++b) { - const fieldName = getSelectorFieldName(selector, b), - tgtName = selector.nameOfBranch(b); - - selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, clusterIndex, i); - } - selector.Process(selector.currentEntry++); - } - - return readNextCluster(rntuple, selector); - }); - }); -} - class ReaderItem { constructor(column, name) { @@ -1523,21 +1397,6 @@ async function rntupleProcess(rntuple, selector, args) { } -// TODO args can later be used to filter fields, limit entries, etc. -// Create reader and deserialize doubles from the buffer -function rntupleProcessOld(rntuple, selector, args) { - selector.Begin(); - return readHeaderFooter(rntuple).then(res => { - if (!res) { - selector.Terminate(false); - return selector; - } - selector.currentCluster = 0; - selector.currentEntry = 0; - return readNextCluster(rntuple, selector, args); - }).then(() => selector); -} - class TDrawSelectorTuple extends TDrawSelector { /** @summary Return total number of entries From 72bd4a631cd4c163395b05b31f78c07166356b96 Mon Sep 17 00:00:00 2001 From: Sergey Linev Date: Tue, 10 Feb 2026 16:16:56 +0100 Subject: [PATCH 17/17] [rntuple] add default value for args --- modules/rntuple.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index 12385689b..7ebac27ed 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -1233,7 +1233,7 @@ class ReaderItem { } -async function rntupleProcess(rntuple, selector, args) { +async function rntupleProcess(rntuple, selector, args = {}) { const handle = { rntuple, // keep rntuple reference file: rntuple.$file, // keep file reference