Skip to content

Commit ee4ceec

Browse files
authored
CLN: pass dtype to from_sequence explicitly (pandas-dev#56506)
* CLN: pass dtype to from_sequence * mypy fixup
1 parent 32aeafd commit ee4ceec

File tree

14 files changed

+72
-54
lines changed

14 files changed

+72
-54
lines changed

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
10711071
fill_value = Index(self._left, copy=False)._na_value
10721072
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
10731073
else:
1074-
empty = self._from_sequence([fill_value] * empty_len)
1074+
empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)
10751075

10761076
if periods > 0:
10771077
a = empty

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def __len__(self) -> int:
150150
return len(self._pa_array)
151151

152152
@classmethod
153-
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
153+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
154154
from pandas.core.arrays.masked import BaseMaskedArray
155155

156156
_chk_pyarrow_available()

pandas/core/reshape/merge.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@
5252
ensure_object,
5353
is_bool,
5454
is_bool_dtype,
55-
is_extension_array_dtype,
5655
is_float_dtype,
5756
is_integer,
5857
is_integer_dtype,
@@ -1385,20 +1384,22 @@ def _maybe_coerce_merge_keys(self) -> None:
13851384
if lk.dtype.kind == rk.dtype.kind:
13861385
continue
13871386

1388-
if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype(
1389-
rk.dtype
1387+
if isinstance(lk.dtype, ExtensionDtype) and not isinstance(
1388+
rk.dtype, ExtensionDtype
13901389
):
13911390
ct = find_common_type([lk.dtype, rk.dtype])
1392-
if is_extension_array_dtype(ct):
1393-
rk = ct.construct_array_type()._from_sequence(rk) # type: ignore[union-attr]
1391+
if isinstance(ct, ExtensionDtype):
1392+
com_cls = ct.construct_array_type()
1393+
rk = com_cls._from_sequence(rk, dtype=ct, copy=False)
13941394
else:
1395-
rk = rk.astype(ct) # type: ignore[arg-type]
1396-
elif is_extension_array_dtype(rk.dtype):
1395+
rk = rk.astype(ct)
1396+
elif isinstance(rk.dtype, ExtensionDtype):
13971397
ct = find_common_type([lk.dtype, rk.dtype])
1398-
if is_extension_array_dtype(ct):
1399-
lk = ct.construct_array_type()._from_sequence(lk) # type: ignore[union-attr]
1398+
if isinstance(ct, ExtensionDtype):
1399+
com_cls = ct.construct_array_type()
1400+
lk = com_cls._from_sequence(lk, dtype=ct, copy=False)
14001401
else:
1401-
lk = lk.astype(ct) # type: ignore[arg-type]
1402+
lk = lk.astype(ct)
14021403

14031404
# check whether ints and floats
14041405
if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):
@@ -2508,15 +2509,15 @@ def _convert_arrays_and_get_rizer_klass(
25082509
if not isinstance(lk, ExtensionArray):
25092510
lk = cls._from_sequence(lk, dtype=dtype, copy=False)
25102511
else:
2511-
lk = lk.astype(dtype)
2512+
lk = lk.astype(dtype, copy=False)
25122513

25132514
if not isinstance(rk, ExtensionArray):
25142515
rk = cls._from_sequence(rk, dtype=dtype, copy=False)
25152516
else:
2516-
rk = rk.astype(dtype)
2517+
rk = rk.astype(dtype, copy=False)
25172518
else:
2518-
lk = lk.astype(dtype)
2519-
rk = rk.astype(dtype)
2519+
lk = lk.astype(dtype, copy=False)
2520+
rk = rk.astype(dtype, copy=False)
25202521
if isinstance(lk, BaseMaskedArray):
25212522
# Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]";
25222523
# expected type "Type[object]"

pandas/tests/arrays/boolean/test_construction.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ def test_coerce_to_numpy_array():
242242

243243
def test_to_boolean_array_from_strings():
244244
result = BooleanArray._from_sequence_of_strings(
245-
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
245+
np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
246+
dtype="boolean",
246247
)
247248
expected = BooleanArray(
248249
np.array([True, False, True, True, False, False, False]),
@@ -254,7 +255,7 @@ def test_to_boolean_array_from_strings():
254255

255256
def test_to_boolean_array_from_strings_invalid_string():
256257
with pytest.raises(ValueError, match="cannot be cast"):
257-
BooleanArray._from_sequence_of_strings(["donkey"])
258+
BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
258259

259260

260261
@pytest.mark.parametrize("box", [True, False], ids=["series", "array"])

pandas/tests/arrays/categorical/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -755,12 +755,12 @@ def test_categorical_extension_array_nullable(self, nulls_fixture):
755755

756756
def test_from_sequence_copy(self):
757757
cat = Categorical(np.arange(5).repeat(2))
758-
result = Categorical._from_sequence(cat, dtype=None, copy=False)
758+
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False)
759759

760760
# more generally, we'd be OK with a view
761761
assert result._codes is cat._codes
762762

763-
result = Categorical._from_sequence(cat, dtype=None, copy=True)
763+
result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True)
764764

765765
assert not tm.shares_memory(result, cat)
766766

pandas/tests/arrays/datetimes/test_cumulative.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def test_accumulators_freq(self):
2626
"2000-01-02",
2727
"2000-01-03",
2828
],
29+
dtype="M8[ns]",
2930
)
3031
tm.assert_datetime_array_equal(result, expected)
3132

pandas/tests/arrays/integer/test_construction.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -175,32 +175,34 @@ def test_to_integer_array_dtype_keyword(constructor):
175175

176176

177177
def test_to_integer_array_float():
178-
result = IntegerArray._from_sequence([1.0, 2.0])
178+
result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64")
179179
expected = pd.array([1, 2], dtype="Int64")
180180
tm.assert_extension_array_equal(result, expected)
181181

182182
with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
183-
IntegerArray._from_sequence([1.5, 2.0])
183+
IntegerArray._from_sequence([1.5, 2.0], dtype="Int64")
184184

185185
# for float dtypes, the itemsize is not preserved
186-
result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))
186+
result = IntegerArray._from_sequence(
187+
np.array([1.0, 2.0], dtype="float32"), dtype="Int64"
188+
)
187189
assert result.dtype == Int64Dtype()
188190

189191

190192
def test_to_integer_array_str():
191-
result = IntegerArray._from_sequence(["1", "2", None])
193+
result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64")
192194
expected = pd.array([1, 2, np.nan], dtype="Int64")
193195
tm.assert_extension_array_equal(result, expected)
194196

195197
with pytest.raises(
196198
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
197199
):
198-
IntegerArray._from_sequence(["1", "2", ""])
200+
IntegerArray._from_sequence(["1", "2", ""], dtype="Int64")
199201

200202
with pytest.raises(
201203
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
202204
):
203-
IntegerArray._from_sequence(["1.5", "2.0"])
205+
IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64")
204206

205207

206208
@pytest.mark.parametrize(

pandas/tests/arrays/test_array.py

+32-21
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,11 @@ def test_dt64_array(dtype_unit):
6060
None,
6161
NumpyExtensionArray(np.array([], dtype=object)),
6262
),
63-
(np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])),
63+
(
64+
np.array([1, 2], dtype="int64"),
65+
None,
66+
IntegerArray._from_sequence([1, 2], dtype="Int64"),
67+
),
6468
(
6569
np.array([1.0, 2.0], dtype="float64"),
6670
None,
@@ -284,7 +288,7 @@ def test_array_copy():
284288
# datetime
285289
(
286290
[pd.Timestamp("2000"), pd.Timestamp("2001")],
287-
DatetimeArray._from_sequence(["2000", "2001"]),
291+
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
288292
),
289293
(
290294
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
@@ -319,7 +323,7 @@ def test_array_copy():
319323
# timedelta
320324
(
321325
[pd.Timedelta("1h"), pd.Timedelta("2h")],
322-
TimedeltaArray._from_sequence(["1h", "2h"]),
326+
TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
323327
),
324328
(
325329
np.array([1, 2], dtype="m8[ns]"),
@@ -330,35 +334,42 @@ def test_array_copy():
330334
TimedeltaArray(np.array([1, 2], dtype="m8[us]")),
331335
),
332336
# integer
333-
([1, 2], IntegerArray._from_sequence([1, 2])),
334-
([1, None], IntegerArray._from_sequence([1, None])),
335-
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])),
336-
([1, np.nan], IntegerArray._from_sequence([1, np.nan])),
337+
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
338+
([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
339+
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
340+
([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
337341
# float
338-
([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2])),
339-
([0.1, None], FloatingArray._from_sequence([0.1, pd.NA])),
340-
([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA])),
341-
([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA])),
342+
([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
343+
([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
344+
([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
345+
([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
342346
# integer-like float
343-
([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0])),
344-
([1.0, None], FloatingArray._from_sequence([1.0, pd.NA])),
345-
([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA])),
346-
([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA])),
347+
([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
348+
([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
349+
([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
350+
([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
347351
# mixed-integer-float
348-
([1, 2.0], FloatingArray._from_sequence([1.0, 2.0])),
349-
([1, np.nan, 2.0], FloatingArray._from_sequence([1.0, None, 2.0])),
352+
([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
353+
(
354+
[1, np.nan, 2.0],
355+
FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
356+
),
350357
# string
351358
(
352359
["a", "b"],
353-
pd.StringDtype().construct_array_type()._from_sequence(["a", "b"]),
360+
pd.StringDtype()
361+
.construct_array_type()
362+
._from_sequence(["a", "b"], dtype=pd.StringDtype()),
354363
),
355364
(
356365
["a", None],
357-
pd.StringDtype().construct_array_type()._from_sequence(["a", None]),
366+
pd.StringDtype()
367+
.construct_array_type()
368+
._from_sequence(["a", None], dtype=pd.StringDtype()),
358369
),
359370
# Boolean
360-
([True, False], BooleanArray._from_sequence([True, False])),
361-
([True, None], BooleanArray._from_sequence([True, None])),
371+
([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
372+
([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
362373
],
363374
)
364375
def test_array_inference(data, expected):

pandas/tests/extension/base/constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_from_sequence_from_cls(self, data):
1818

1919
def test_array_from_scalars(self, data):
2020
scalars = [data[0], data[1], data[2]]
21-
result = data._from_sequence(scalars)
21+
result = data._from_sequence(scalars, dtype=data.dtype)
2222
assert isinstance(result, type(data))
2323

2424
def test_series_constructor(self, data):

pandas/tests/extension/base/methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def test_duplicated(self, data, keep):
263263
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
264264
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
265265
def test_unique(self, data, box, method):
266-
duplicated = box(data._from_sequence([data[0], data[0]]))
266+
duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype))
267267

268268
result = method(duplicated)
269269

pandas/tests/extension/list/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(self, values, dtype=None, copy=False) -> None:
5454
self.data = values
5555

5656
@classmethod
57-
def _from_sequence(cls, scalars, dtype=None, copy=False):
57+
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
5858
data = np.empty(len(scalars), dtype=object)
5959
data[:] = scalars
6060
return cls(data)

pandas/tests/extension/test_arrow.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,13 @@ def test_from_dtype(self, data, request):
335335
def test_from_sequence_pa_array(self, data):
336336
# https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784
337337
# data._pa_array = pa.ChunkedArray
338-
result = type(data)._from_sequence(data._pa_array)
338+
result = type(data)._from_sequence(data._pa_array, dtype=data.dtype)
339339
tm.assert_extension_array_equal(result, data)
340340
assert isinstance(result._pa_array, pa.ChunkedArray)
341341

342-
result = type(data)._from_sequence(data._pa_array.combine_chunks())
342+
result = type(data)._from_sequence(
343+
data._pa_array.combine_chunks(), dtype=data.dtype
344+
)
343345
tm.assert_extension_array_equal(result, data)
344346
assert isinstance(result._pa_array, pa.ChunkedArray)
345347

pandas/tests/indexes/timedeltas/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def test_array_of_dt64_nat_raises(self):
3131
TimedeltaIndex(arr)
3232

3333
with pytest.raises(TypeError, match=msg):
34-
TimedeltaArray._from_sequence(arr)
34+
TimedeltaArray._from_sequence(arr, dtype="m8[ns]")
3535

3636
with pytest.raises(TypeError, match=msg):
3737
to_timedelta(arr)

pandas/tests/tools/test_to_timedelta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_to_timedelta_oob_non_nano(self):
9898
TimedeltaIndex(arr)
9999

100100
with pytest.raises(OutOfBoundsTimedelta, match=msg):
101-
TimedeltaArray._from_sequence(arr)
101+
TimedeltaArray._from_sequence(arr, dtype="m8[s]")
102102

103103
@pytest.mark.parametrize(
104104
"arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]

0 commit comments

Comments
 (0)