Skip to content

Commit 388eca7

Browse files
jorisvandenbosscheJulianWgs
authored andcommitted
[ArrowStringDtype] Make it already a StringDtype subclass (pandas-dev#41312)
1 parent d3e613b commit 388eca7

File tree

5 files changed

+12
-16
lines changed

5 files changed

+12
-16
lines changed

pandas/core/arrays/base.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,6 @@ def astype(self, dtype, copy=True):
530530
NumPy ndarray with 'dtype' for its dtype.
531531
"""
532532
from pandas.core.arrays.string_ import StringDtype
533-
from pandas.core.arrays.string_arrow import ArrowStringDtype
534533

535534
dtype = pandas_dtype(dtype)
536535
if is_dtype_equal(dtype, self.dtype):
@@ -540,9 +539,8 @@ def astype(self, dtype, copy=True):
540539
return self.copy()
541540

542541
# FIXME: Really hard-code here?
543-
if isinstance(
544-
dtype, (ArrowStringDtype, StringDtype)
545-
): # allow conversion to StringArrays
542+
if isinstance(dtype, StringDtype):
543+
# allow conversion to StringArrays
546544
return dtype.construct_array_type()._from_sequence(self, copy=False)
547545

548546
return np.array(self, dtype=dtype, copy=copy)

pandas/core/arrays/interval.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,6 @@ def astype(self, dtype, copy: bool = True):
829829
"""
830830
from pandas import Index
831831
from pandas.core.arrays.string_ import StringDtype
832-
from pandas.core.arrays.string_arrow import ArrowStringDtype
833832

834833
if dtype is not None:
835834
dtype = pandas_dtype(dtype)
@@ -852,7 +851,7 @@ def astype(self, dtype, copy: bool = True):
852851
return self._shallow_copy(new_left, new_right)
853852
elif is_categorical_dtype(dtype):
854853
return Categorical(np.asarray(self), dtype=dtype)
855-
elif isinstance(dtype, (StringDtype, ArrowStringDtype)):
854+
elif isinstance(dtype, StringDtype):
856855
return dtype.construct_array_type()._from_sequence(self, copy=False)
857856

858857
# TODO: This try/except will be repeated.

pandas/core/arrays/string_arrow.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
from pandas.util._decorators import doc
2525
from pandas.util._validators import validate_fillna_kwargs
2626

27-
from pandas.core.dtypes.base import ExtensionDtype
2827
from pandas.core.dtypes.common import (
2928
is_array_like,
3029
is_bool_dtype,
@@ -42,6 +41,7 @@
4241
from pandas.core.arrays.base import ExtensionArray
4342
from pandas.core.arrays.boolean import BooleanDtype
4443
from pandas.core.arrays.integer import Int64Dtype
44+
from pandas.core.arrays.string_ import StringDtype
4545
from pandas.core.indexers import (
4646
check_array_indexer,
4747
validate_indices,
@@ -74,7 +74,7 @@
7474

7575

7676
@register_extension_dtype
77-
class ArrowStringDtype(ExtensionDtype):
77+
class ArrowStringDtype(StringDtype):
7878
"""
7979
Extension dtype for string data in a ``pyarrow.ChunkedArray``.
8080
@@ -110,7 +110,7 @@ def type(self) -> type[str]:
110110
return str
111111

112112
@classmethod
113-
def construct_array_type(cls) -> type_t[ArrowStringArray]:
113+
def construct_array_type(cls) -> type_t[ArrowStringArray]: # type: ignore[override]
114114
"""
115115
Return the array type associated with this dtype.
116116
@@ -126,7 +126,9 @@ def __hash__(self) -> int:
126126
def __repr__(self) -> str:
127127
return "ArrowStringDtype"
128128

129-
def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> ArrowStringArray:
129+
def __from_arrow__( # type: ignore[override]
130+
self, array: pa.Array | pa.ChunkedArray
131+
) -> ArrowStringArray:
130132
"""
131133
Construct StringArray from pyarrow Array/ChunkedArray.
132134
"""

pandas/core/strings/accessor.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,10 @@ class StringMethods(NoNewAttributesMixin):
155155

156156
def __init__(self, data):
157157
from pandas.core.arrays.string_ import StringDtype
158-
from pandas.core.arrays.string_arrow import ArrowStringDtype
159158

160159
self._inferred_dtype = self._validate(data)
161160
self._is_categorical = is_categorical_dtype(data.dtype)
162-
self._is_string = isinstance(data.dtype, (StringDtype, ArrowStringDtype))
161+
self._is_string = isinstance(data.dtype, StringDtype)
163162
self._data = data
164163

165164
self._index = self._name = None
@@ -3028,9 +3027,8 @@ def _result_dtype(arr):
30283027
# ideally we just pass `dtype=arr.dtype` unconditionally, but this fails
30293028
# when the list of values is empty.
30303029
from pandas.core.arrays.string_ import StringDtype
3031-
from pandas.core.arrays.string_arrow import ArrowStringDtype
30323030

3033-
if isinstance(arr.dtype, (StringDtype, ArrowStringDtype)):
3031+
if isinstance(arr.dtype, StringDtype):
30343032
return arr.dtype.name
30353033
else:
30363034
return object

pandas/tests/extension/json/array.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
ExtensionDtype,
4141
)
4242
from pandas.api.types import is_bool_dtype
43-
from pandas.core.arrays.string_arrow import ArrowStringDtype
4443

4544

4645
class JSONDtype(ExtensionDtype):
@@ -196,7 +195,7 @@ def astype(self, dtype, copy=True):
196195
if copy:
197196
return self.copy()
198197
return self
199-
elif isinstance(dtype, (StringDtype, ArrowStringDtype)):
198+
elif isinstance(dtype, StringDtype):
200199
value = self.astype(str) # numpy doesn'y like nested dicts
201200
return dtype.construct_array_type()._from_sequence(value, copy=False)
202201

0 commit comments

Comments
 (0)