From a78836b750864497a701955f92ca2b87bc61946a Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 12 Aug 2023 13:10:20 -0400 Subject: [PATCH 1/4] use version flag --- pandas/core/arrays/arrow/array.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 88695f11fba59..58d5f83637110 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1011,12 +1011,10 @@ def factorize( ) -> tuple[np.ndarray, ExtensionArray]: null_encoding = "mask" if use_na_sentinel else "encode" - pa_type = self._pa_array.type - if pa.types.is_duration(pa_type): + data = self._pa_array + if pa_version_under11p0 and pa.types.is_duration(data.type): # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323 - data = self._pa_array.cast(pa.int64()) - else: - data = self._pa_array + data = data.cast(pa.int64()) if pa.types.is_dictionary(data.type): encoded = data @@ -1034,7 +1032,7 @@ def factorize( ) uniques = type(self)(encoded.chunk(0).dictionary) - if pa.types.is_duration(pa_type): + if pa_version_under11p0 and pa.types.is_duration(data.type): uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype)) return indices, uniques From 2216a1faae67b7b9e531cc34f86bb58f7a4c3dec Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 12 Aug 2023 13:17:15 -0400 Subject: [PATCH 2/4] more cases --- pandas/core/arrays/arrow/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 58d5f83637110..880fd72b861e6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1271,7 +1271,7 @@ def unique(self) -> Self: """ pa_type = self._pa_array.type - if pa.types.is_duration(pa_type): + if pa_version_under11p0 and pa.types.is_duration(pa_type): # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323 data = self._pa_array.cast(pa.int64()) else: @@ -1302,7 +1302,7 @@ def value_counts(self, dropna: bool = True) -> Series: Series.value_counts """ pa_type = self._pa_array.type - if pa.types.is_duration(pa_type): + if pa_version_under11p0 and pa.types.is_duration(pa_type): # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323 data = self._pa_array.cast(pa.int64()) else: From 3b644d0a74a1571c4e86e262133a335f4542eb5d Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 12 Aug 2023 14:35:26 -0400 Subject: [PATCH 3/4] updates --- pandas/core/arrays/arrow/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 880fd72b861e6..149533e25d1d7 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1279,7 +1279,7 @@ def unique(self) -> Self: pa_result = pc.unique(data) - if pa.types.is_duration(pa_type): + if pa_version_under11p0 and pa.types.is_duration(pa_type): pa_result = pa_result.cast(pa_type) return type(self)(pa_result) @@ -1322,7 +1322,7 @@ def value_counts(self, dropna: bool = True) -> Series: values = values.filter(mask) counts = counts.filter(mask) - if pa.types.is_duration(pa_type): + if pa_version_under11p0 and pa.types.is_duration(pa_type): values = values.cast(pa_type) counts = ArrowExtensionArray(counts) From 93304e292212db6168941e1a963448634f6fc713 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sat, 12 Aug 2023 17:13:29 -0400 Subject: [PATCH 4/4] fix --- pandas/core/arrays/arrow/array.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 149533e25d1d7..07436cdf24e8d 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1012,7 +1012,8 @@ def factorize( null_encoding = "mask" if use_na_sentinel else "encode" data = self._pa_array - if pa_version_under11p0 and pa.types.is_duration(data.type): + pa_type = data.type + if pa_version_under11p0 and pa.types.is_duration(pa_type): # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323 data = data.cast(pa.int64()) @@ -1032,7 +1033,7 @@ def factorize( ) uniques = type(self)(encoded.chunk(0).dictionary) - if pa_version_under11p0 and pa.types.is_duration(data.type): + if pa_version_under11p0 and pa.types.is_duration(pa_type): uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype)) return indices, uniques