Skip to content

Commit 9008bd7

Browse files
committed
test: add test for Arrow C stream capsule release behavior
1 parent 7ee5924 commit 9008bd7

File tree

2 files changed

+33
-29
lines changed

2 files changed

+33
-29
lines changed

python/tests/test_dataframe.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,6 +1698,37 @@ def test_arrow_c_stream_schema_mismatch(fail_collect):
16981698
df.__arrow_c_stream__(bad_capsule)
16991699

17001700

1701+
def test_arrow_c_stream_capsule_released(ctx):
1702+
df = ctx.from_pydict({"a": [1]})
1703+
1704+
capsule = df.__arrow_c_stream__()
1705+
1706+
get_destructor = ctypes.pythonapi.PyCapsule_GetDestructor
1707+
get_destructor.restype = ctypes.c_void_p
1708+
get_destructor.argtypes = [ctypes.py_object]
1709+
1710+
# The capsule should not have a registered destructor
1711+
assert get_destructor(capsule) == 0
1712+
1713+
get_ptr = ctypes.pythonapi.PyCapsule_GetPointer
1714+
get_ptr.restype = ctypes.c_void_p
1715+
get_ptr.argtypes = [ctypes.py_object, ctypes.c_char_p]
1716+
pyerr_clear = ctypes.pythonapi.PyErr_Clear
1717+
pyerr_clear.restype = None
1718+
pyerr_clear.argtypes = []
1719+
1720+
# Pointer is valid before importing to PyArrow
1721+
assert get_ptr(capsule, b"arrow_array_stream")
1722+
pyerr_clear()
1723+
1724+
reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
1725+
reader.read_all()
1726+
1727+
# After import the capsule no longer exposes the pointer
1728+
with pytest.raises(ValueError):
1729+
get_ptr(capsule, b"arrow_array_stream")
1730+
pyerr_clear()
1731+
17011732
def test_to_pylist(df):
17021733
# Convert datafusion dataframe to Python list
17031734
pylist = df.to_pylist()

src/dataframe.rs

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -63,31 +63,6 @@ use crate::{
6363
static ARROW_STREAM_NAME: &CStr =
6464
unsafe { CStr::from_bytes_with_nul_unchecked(b"arrow_array_stream\0") };
6565

66-
unsafe extern "C" fn drop_stream(capsule: *mut ffi::PyObject) {
67-
if capsule.is_null() {
68-
return;
69-
}
70-
71-
// When PyArrow imports this capsule it steals the raw stream pointer and
72-
// sets the capsule's internal pointer to NULL. In that case
73-
// `PyCapsule_IsValid` returns 0 and this destructor must not drop the
74-
// stream as ownership has been transferred to PyArrow. If the capsule was
75-
// never imported, the pointer remains valid and we are responsible for
76-
// freeing the stream here.
77-
if ffi::PyCapsule_IsValid(capsule, ARROW_STREAM_NAME.as_ptr()) == 1 {
78-
let stream_ptr = ffi::PyCapsule_GetPointer(capsule, ARROW_STREAM_NAME.as_ptr())
79-
as *mut FFI_ArrowArrayStream;
80-
if !stream_ptr.is_null() {
81-
drop(Box::from_raw(stream_ptr));
82-
}
83-
}
84-
85-
// `PyCapsule_GetPointer` sets a Python error on failure. Clear it only
86-
// after the stream has been released (or determined to be owned
87-
// elsewhere).
88-
ffi::PyErr_Clear();
89-
}
90-
9166
// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
9267
// - we have not decided on the table_provider approach yet
9368
// this is an interim implementation
@@ -999,14 +974,12 @@ impl PyDataFrame {
999974
"ArrowArrayStream pointer should never be null",
1000975
);
1001976
// The returned capsule allows zero-copy hand-off to PyArrow. When
1002-
// PyArrow imports the capsule it assumes ownership of the stream and
1003-
// nulls out the capsule's internal pointer so `drop_stream` knows not to
1004-
// free it.
977+
// PyArrow imports the capsule it assumes ownership of the stream.
1005978
let capsule = unsafe {
1006979
ffi::PyCapsule_New(
1007980
stream_ptr as *mut c_void,
1008981
ARROW_STREAM_NAME.as_ptr(),
1009-
Some(drop_stream),
982+
None,
1010983
)
1011984
};
1012985
if capsule.is_null() {

0 commit comments

Comments
 (0)