@@ -190,6 +190,8 @@ def describe_categorical(self):
190
190
191
191
@property
192
192
def describe_null (self ):
193
+ if self ._col .dtype in ("Int64" , "Int64[pyarrow]" ):
194
+ return ColumnNullType .USE_BYTEMASK , 1
193
195
kind = self .dtype [0 ]
194
196
try :
195
197
null , value = _NULL_DESCRIPTION [kind ]
@@ -298,7 +300,11 @@ def _get_data_buffer(
298
300
DtypeKind .FLOAT ,
299
301
DtypeKind .BOOL ,
300
302
):
301
- np_arr = self ._col .to_numpy ()
303
+ arr = self ._col .array
304
+ if hasattr (arr , "_data" ):
305
+ np_arr = arr ._data
306
+ else :
307
+ np_arr = arr ._ndarray
302
308
buffer = PandasBuffer (np_arr , allow_copy = self ._allow_copy )
303
309
dtype = self .dtype
304
310
elif self .dtype [0 ] == DtypeKind .CATEGORICAL :
@@ -341,6 +347,32 @@ def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]:
341
347
"""
342
348
null , invalid = self .describe_null
343
349
350
+ if self ._col .dtype == "Int64[pyarrow]" :
351
+ buf , mask = self ._col .array ._pa_array .chunk (0 ).buffers ()
352
+
353
+ # Convert the mask array to a Pandas "buffer" using
354
+ # a NumPy array as the backing store
355
+ buffer = PandasBuffer (mask , size = 8 )
356
+
357
+ # Define the dtype of the returned buffer
358
+ dtype = (DtypeKind .BOOL , 8 , ArrowCTypes .BOOL , Endianness .NATIVE )
359
+
360
+ return buffer , dtype
361
+
362
+ if self ._col .dtype == "Int64" :
363
+ buf = self ._col .array ._data
364
+
365
+ mask = self ._col .array ._mask
366
+
367
+ # Convert the mask array to a Pandas "buffer" using
368
+ # a NumPy array as the backing store
369
+ buffer = PandasBuffer (mask )
370
+
371
+ # Define the dtype of the returned buffer
372
+ dtype = (DtypeKind .BOOL , 8 , ArrowCTypes .BOOL , Endianness .NATIVE )
373
+
374
+ return buffer , dtype
375
+
344
376
if self .dtype [0 ] == DtypeKind .STRING :
345
377
# For now, use byte array as the mask.
346
378
# TODO: maybe store as bit array to save space?..
0 commit comments