Skip to content

Commit c779339

Browse files
committed
add comment that explains arrow IPC alignment issue
1 parent 96c71cb commit c779339

File tree

1 file changed

+5
-0
lines changed
  • datafusion/physical-plan/src/spill

1 file changed

+5
-0
lines changed

datafusion/physical-plan/src/spill/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,11 @@ impl IPCStreamWriter {
308308
})?;
309309

310310
let metadata_version = MetadataVersion::V5;
311+
// Depending on the schema, some array types such as StringViewArray require larger (16 byte in this case) alignment.
312+
// If the actual buffer layout after IPC read does not satisfy the alignment requirement,
313+
// Arrow ArrayBuilder will copy the buffer into a newly allocated, properly aligned buffer.
314+
// This copying may lead to memory blowup during IPC read due to duplicated buffers.
315+
// To avoid this, we compute the maximum required alignment based on the schema and configure the IPCStreamWriter accordingly.
311316
let alignment = get_max_alignment_for_schema(schema);
312317
let mut write_options =
313318
IpcWriteOptions::try_new(alignment, false, metadata_version)?;

0 commit comments

Comments
 (0)