Skip to content

Commit 6d999c0

Browse files
TYP: make dtype required in _from_sequence_of_strings (#56519)
* TYP: make dtype required in _from_sequence_of_strings * GH ref * mypy fixup * Move whatsnew * Update pandas/core/arrays/base.py Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 1ed627a commit 6d999c0

File tree

11 files changed

+34
-21
lines changed

11 files changed

+34
-21
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ Other API changes
8888
^^^^^^^^^^^^^^^^^
8989
- 3rd party ``py.path`` objects are no longer explicitly supported in IO methods. Use :py:class:`pathlib.Path` objects instead (:issue:`57091`)
9090
- :attr:`MultiIndex.codes`, :attr:`MultiIndex.levels`, and :attr:`MultiIndex.names` now returns a ``tuple`` instead of a ``FrozenList`` (:issue:`53531`)
91+
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
9192
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
9293
-
9394

pandas/core/arrays/arrow/array.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ def floordiv_compat(
199199
npt,
200200
)
201201

202+
from pandas.core.dtypes.dtypes import ExtensionDtype
203+
202204
from pandas import Series
203205
from pandas.core.arrays.datetimes import DatetimeArray
204206
from pandas.core.arrays.timedeltas import TimedeltaArray
@@ -316,7 +318,7 @@ def _from_sequence(
316318

317319
@classmethod
318320
def _from_sequence_of_strings(
319-
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
321+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
320322
) -> Self:
321323
"""
322324
Construct a new ExtensionArray from a sequence of strings.
@@ -533,8 +535,9 @@ def _box_pa_array(
533535
):
534536
# TODO: Move logic in _from_sequence_of_strings into
535537
# _box_pa_array
538+
dtype = ArrowDtype(pa_type)
536539
return cls._from_sequence_of_strings(
537-
value, dtype=pa_type
540+
value, dtype=dtype
538541
)._pa_array
539542
else:
540543
raise

pandas/core/arrays/base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
332332

333333
@classmethod
334334
def _from_sequence_of_strings(
335-
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
335+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
336336
) -> Self:
337337
"""
338338
Construct a new ExtensionArray from a sequence of strings.
@@ -342,7 +342,7 @@ def _from_sequence_of_strings(
342342
strings : Sequence
343343
Each element will be an instance of the scalar type for this
344344
array, ``cls.dtype.type``.
345-
dtype : dtype, optional
345+
dtype : ExtensionDtype
346346
Construct for this particular dtype. This should be a Dtype
347347
compatible with the ExtensionArray.
348348
copy : bool, default False
@@ -354,7 +354,9 @@ def _from_sequence_of_strings(
354354
355355
Examples
356356
--------
357-
>>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"])
357+
>>> pd.arrays.IntegerArray._from_sequence_of_strings(
358+
... ["1", "2", "3"], dtype=pd.Int64Dtype()
359+
... )
358360
<IntegerArray>
359361
[1, 2, 3]
360362
Length: 3, dtype: Int64

pandas/core/arrays/boolean.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@
2929
import pyarrow
3030

3131
from pandas._typing import (
32-
Dtype,
3332
DtypeObj,
3433
Self,
3534
npt,
3635
type_t,
3736
)
3837

38+
from pandas.core.dtypes.dtypes import ExtensionDtype
39+
3940

4041
@register_extension_dtype
4142
class BooleanDtype(BaseMaskedDtype):
@@ -324,7 +325,7 @@ def _from_sequence_of_strings(
324325
cls,
325326
strings: list[str],
326327
*,
327-
dtype: Dtype | None = None,
328+
dtype: ExtensionDtype,
328329
copy: bool = False,
329330
true_values: list[str] | None = None,
330331
false_values: list[str] | None = None,

pandas/core/arrays/numeric.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@
3333
import pyarrow
3434

3535
from pandas._typing import (
36-
Dtype,
3736
DtypeObj,
3837
Self,
3938
npt,
4039
)
4140

41+
from pandas.core.dtypes.dtypes import ExtensionDtype
42+
4243

4344
class NumericDtype(BaseMaskedDtype):
4445
_default_np_dtype: np.dtype
@@ -270,7 +271,7 @@ def _coerce_to_array(
270271

271272
@classmethod
272273
def _from_sequence_of_strings(
273-
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
274+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
274275
) -> Self:
275276
from pandas.core.tools.numeric import to_numeric
276277

pandas/core/arrays/period.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@
9090
npt,
9191
)
9292

93+
from pandas.core.dtypes.dtypes import ExtensionDtype
94+
9395
from pandas.core.arrays import (
9496
DatetimeArray,
9597
TimedeltaArray,
@@ -303,7 +305,7 @@ def _from_sequence(
303305

304306
@classmethod
305307
def _from_sequence_of_strings(
306-
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
308+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
307309
) -> Self:
308310
return cls._from_sequence(strings, dtype=dtype, copy=copy)
309311

pandas/core/arrays/string_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ def _from_sequence(
416416

417417
@classmethod
418418
def _from_sequence_of_strings(
419-
cls, strings, *, dtype: Dtype | None = None, copy: bool = False
419+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
420420
) -> Self:
421421
return cls._from_sequence(strings, dtype=dtype, copy=copy)
422422

pandas/core/arrays/string_arrow.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262
npt,
6363
)
6464

65+
from pandas.core.dtypes.dtypes import ExtensionDtype
66+
6567
from pandas import Series
6668

6769

@@ -202,7 +204,7 @@ def _from_sequence(
202204

203205
@classmethod
204206
def _from_sequence_of_strings(
205-
cls, strings, dtype: Dtype | None = None, copy: bool = False
207+
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
206208
) -> Self:
207209
return cls._from_sequence(strings, dtype=dtype, copy=copy)
208210

pandas/tests/arrays/boolean/test_construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def test_coerce_to_numpy_array():
243243
def test_to_boolean_array_from_strings():
244244
result = BooleanArray._from_sequence_of_strings(
245245
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
246-
dtype="boolean",
246+
dtype=pd.BooleanDtype(),
247247
)
248248
expected = BooleanArray(
249249
np.array([True, False, True, True, False, False, False]),
@@ -255,7 +255,7 @@ def test_to_boolean_array_from_strings():
255255

256256
def test_to_boolean_array_from_strings_invalid_string():
257257
with pytest.raises(ValueError, match="cannot be cast"):
258-
BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
258+
BooleanArray._from_sequence_of_strings(["donkey"], dtype=pd.BooleanDtype())
259259

260260

261261
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])

pandas/tests/extension/decimal/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
101101
return cls(scalars)
102102

103103
@classmethod
104-
def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
104+
def _from_sequence_of_strings(cls, strings, *, dtype: ExtensionDtype, copy=False):
105105
return cls._from_sequence(
106106
[decimal.Decimal(x) for x in strings], dtype=dtype, copy=copy
107107
)

pandas/tests/extension/test_arrow.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -354,10 +354,9 @@ def test_from_sequence_pa_array(self, data):
354354
assert isinstance(result._pa_array, pa.ChunkedArray)
355355

356356
def test_from_sequence_pa_array_notimplemented(self, request):
357+
dtype = ArrowDtype(pa.month_day_nano_interval())
357358
with pytest.raises(NotImplementedError, match="Converting strings to"):
358-
ArrowExtensionArray._from_sequence_of_strings(
359-
["12-1"], dtype=pa.month_day_nano_interval()
360-
)
359+
ArrowExtensionArray._from_sequence_of_strings(["12-1"], dtype=dtype)
361360

362361
def test_from_sequence_of_strings_pa_array(self, data, request):
363362
pa_dtype = data.dtype.pyarrow_dtype
@@ -2409,7 +2408,8 @@ def test_duration_from_strings_with_nat(unit):
24092408
# GH51175
24102409
strings = ["1000", "NaT"]
24112410
pa_type = pa.duration(unit)
2412-
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type)
2411+
dtype = ArrowDtype(pa_type)
2412+
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=dtype)
24132413
expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type))
24142414
tm.assert_extension_array_equal(result, expected)
24152415

@@ -2928,13 +2928,14 @@ def test_from_sequence_of_strings_boolean():
29282928
[True] * len(true_strings) + [False] * len(false_strings) + [None] * len(nulls)
29292929
)
29302930

2931-
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa.bool_())
2931+
dtype = ArrowDtype(pa.bool_())
2932+
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=dtype)
29322933
expected = pd.array(bools, dtype="boolean[pyarrow]")
29332934
tm.assert_extension_array_equal(result, expected)
29342935

29352936
strings = ["True", "foo"]
29362937
with pytest.raises(pa.ArrowInvalid, match="Failed to parse"):
2937-
ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa.bool_())
2938+
ArrowExtensionArray._from_sequence_of_strings(strings, dtype=dtype)
29382939

29392940

29402941
def test_concat_empty_arrow_backed_series(dtype):

0 commit comments

Comments
 (0)