Skip to content

Commit b215522

Browse files
meeseeksmachinelukemanleyphofl
authored
Backport PR #54515 on branch 2.1.x (ENH: ArrowExtensionArray(duration) workarounds for pyarrow versions >= 11.0) (#54524)
Backport PR #54515: ENH: ArrowExtensionArray(duration) workarounds for pyarrow versions >= 11.0 Co-authored-by: Luke Manley <[email protected]> Co-authored-by: Patrick Hoefler <[email protected]>
1 parent b3f122c commit b215522

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

pandas/core/arrays/arrow/array.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -1011,12 +1011,11 @@ def factorize(
10111011
) -> tuple[np.ndarray, ExtensionArray]:
10121012
null_encoding = "mask" if use_na_sentinel else "encode"
10131013

1014-
pa_type = self._pa_array.type
1015-
if pa.types.is_duration(pa_type):
1014+
data = self._pa_array
1015+
pa_type = data.type
1016+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
10161017
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
1017-
data = self._pa_array.cast(pa.int64())
1018-
else:
1019-
data = self._pa_array
1018+
data = data.cast(pa.int64())
10201019

10211020
if pa.types.is_dictionary(data.type):
10221021
encoded = data
@@ -1034,7 +1033,7 @@ def factorize(
10341033
)
10351034
uniques = type(self)(encoded.chunk(0).dictionary)
10361035

1037-
if pa.types.is_duration(pa_type):
1036+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
10381037
uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype))
10391038
return indices, uniques
10401039

@@ -1273,15 +1272,15 @@ def unique(self) -> Self:
12731272
"""
12741273
pa_type = self._pa_array.type
12751274

1276-
if pa.types.is_duration(pa_type):
1275+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
12771276
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
12781277
data = self._pa_array.cast(pa.int64())
12791278
else:
12801279
data = self._pa_array
12811280

12821281
pa_result = pc.unique(data)
12831282

1284-
if pa.types.is_duration(pa_type):
1283+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
12851284
pa_result = pa_result.cast(pa_type)
12861285

12871286
return type(self)(pa_result)
@@ -1304,7 +1303,7 @@ def value_counts(self, dropna: bool = True) -> Series:
13041303
Series.value_counts
13051304
"""
13061305
pa_type = self._pa_array.type
1307-
if pa.types.is_duration(pa_type):
1306+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
13081307
# https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
13091308
data = self._pa_array.cast(pa.int64())
13101309
else:
@@ -1324,7 +1323,7 @@ def value_counts(self, dropna: bool = True) -> Series:
13241323
values = values.filter(mask)
13251324
counts = counts.filter(mask)
13261325

1327-
if pa.types.is_duration(pa_type):
1326+
if pa_version_under11p0 and pa.types.is_duration(pa_type):
13281327
values = values.cast(pa_type)
13291328

13301329
counts = ArrowExtensionArray(counts)

0 commit comments

Comments
 (0)