1717
1818import pandas as pd
1919from pandas .api .types import is_string_dtype
20- from pandas .core .interchange .buffer import PandasBuffer
20+ from pandas .core .interchange .buffer import (
21+ PandasBuffer ,
22+ PandasPyarrowBackedBuffer ,
23+ )
2124from pandas .core .interchange .dataframe_protocol import (
2225 Column ,
2326 ColumnBuffers ,
@@ -195,6 +198,17 @@ def describe_null(self):
195198 null_value = 1
196199 return column_null_dtype , null_value
197200 kind = self .dtype [0 ]
201+ if isinstance (self ._col .dtype , ArrowDtype ):
202+ column_null_dtype = ColumnNullType .USE_BITMASK
203+ null_value = 0
204+ if ~ self ._col .isna ().any ():
205+ try :
206+ null , value = _NULL_DESCRIPTION [kind ]
207+ except KeyError as err :
208+ raise NotImplementedError (f"Data type { kind } not yet supported" ) from err
209+
210+ return null , value
211+ return column_null_dtype , null_value
198212 try :
199213 null , value = _NULL_DESCRIPTION [kind ]
200214 except KeyError as err :
@@ -282,6 +296,16 @@ def _get_data_buffer(
282296 """
283297 Return the buffer containing the data and the buffer's associated dtype.
284298 """
299+ if isinstance (self ._col .dtype , ArrowDtype ):
300+ arr = self ._col .array
301+ buffer = PandasPyarrowBackedBuffer (arr ._pa_array .chunks [0 ].buffers ()[1 ])
302+ dtype = (
303+ DtypeKind .BOOL ,
304+ 8 ,
305+ ArrowCTypes .BOOL ,
306+ Endianness .NATIVE ,
307+ ) # note: currently only support native endianness
308+ return buffer , dtype
285309 if self .dtype [0 ] == DtypeKind .DATETIME :
286310 # self.dtype[2] is an ArrowCTypes.TIMESTAMP where the tz will make
287311 # it longer than 4 characters
@@ -305,8 +329,6 @@ def _get_data_buffer(
305329 arr = self ._col .array
306330 if isinstance (self ._col .dtype , BaseMaskedDtype ):
307331 np_arr = arr ._data # type: ignore[attr-defined]
308- elif isinstance (self ._col .dtype , ArrowDtype ):
309- raise NotImplementedError ("ArrowDtype not handled yet" )
310332 else :
311333 np_arr = arr ._ndarray # type: ignore[attr-defined]
312334 buffer = PandasBuffer (np_arr , allow_copy = self ._allow_copy )
@@ -351,6 +373,15 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
351373 """
352374 null , invalid = self .describe_null
353375
376+ if isinstance (self ._col .dtype , ArrowDtype ):
377+ arr = self ._col .array
378+ buf = arr ._pa_array .chunks [0 ].buffers ()[0 ]
379+ dtype = (DtypeKind .BOOL , 1 , ArrowCTypes .BOOL , Endianness .NATIVE )
380+ if buf is None :
381+ return buf , dtype
382+ buffer = PandasPyarrowBackedBuffer (buf )
383+ return buffer , dtype
384+
354385 if isinstance (self ._col .dtype , BaseMaskedDtype ):
355386 mask = self ._col .array ._mask # type: ignore[attr-defined]
356387 buffer = PandasBuffer (mask )
0 commit comments