Skip to content

Commit a92b919

Browse files
authored
REF: pass dtype explicitly to _from_sequence (pandas-dev#59774)
1 parent a9f76d7 commit a92b919

File tree

12 files changed

+57
-35
lines changed

12 files changed

+57
-35
lines changed

pandas/core/arrays/arrow/array.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2300,7 +2300,13 @@ def _groupby_op(
23002300
)
23012301
if isinstance(result, np.ndarray):
23022302
return result
2303-
return type(self)._from_sequence(result, copy=False)
2303+
elif isinstance(result, BaseMaskedArray):
2304+
pa_result = result.__arrow_array__()
2305+
return type(self)(pa_result)
2306+
else:
2307+
# DatetimeArray, TimedeltaArray
2308+
pa_result = pa.array(result, from_pandas=True)
2309+
return type(self)(pa_result)
23042310

23052311
def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
23062312
"""Apply a callable to each element while maintaining the chunking structure."""

pandas/core/arrays/datetimelike.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1393,7 +1393,7 @@ def __add__(self, other):
13931393
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
13941394
from pandas.core.arrays import TimedeltaArray
13951395

1396-
return TimedeltaArray._from_sequence(result)
1396+
return TimedeltaArray._from_sequence(result, dtype=result.dtype)
13971397
return result
13981398

13991399
def __radd__(self, other):
@@ -1453,7 +1453,7 @@ def __sub__(self, other):
14531453
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
14541454
from pandas.core.arrays import TimedeltaArray
14551455

1456-
return TimedeltaArray._from_sequence(result)
1456+
return TimedeltaArray._from_sequence(result, dtype=result.dtype)
14571457
return result
14581458

14591459
def __rsub__(self, other):
@@ -1472,7 +1472,7 @@ def __rsub__(self, other):
14721472
# Avoid down-casting DatetimeIndex
14731473
from pandas.core.arrays import DatetimeArray
14741474

1475-
other = DatetimeArray._from_sequence(other)
1475+
other = DatetimeArray._from_sequence(other, dtype=other.dtype)
14761476
return other - self
14771477
elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
14781478
# GH#19959 datetime - datetime is well-defined as timedelta,

pandas/core/arrays/datetimes.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -818,11 +818,7 @@ def _add_offset(self, offset: BaseOffset) -> Self:
818818
stacklevel=find_stack_level(),
819819
)
820820
res_values = self.astype("O") + offset
821-
# TODO(GH#55564): as_unit will be unnecessary
822-
result = type(self)._from_sequence(res_values).as_unit(self.unit)
823-
if not len(self):
824-
# GH#30336 _from_sequence won't be able to infer self.tz
825-
return result.tz_localize(self.tz)
821+
result = type(self)._from_sequence(res_values, dtype=self.dtype)
826822

827823
else:
828824
result = type(self)._simple_new(res_values, dtype=res_values.dtype)

pandas/core/arrays/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
812812
new_parr = self.asfreq(freq, how=how)
813813

814814
new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
815-
dta = DatetimeArray._from_sequence(new_data)
815+
dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]"))
816816

817817
if self.freq.name == "B":
818818
# See if we can retain BDay instead of Day in cases where

pandas/tests/arrays/datetimes/test_constructors.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@ def test_mixing_naive_tzaware_raises(self, meth):
2828
# GH#24569
2929
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])
3030

31-
msg = (
32-
"Cannot mix tz-aware with tz-naive values|"
33-
"Tz-aware datetime.datetime cannot be converted "
34-
"to datetime64 unless utc=True"
31+
msg = "|".join(
32+
[
33+
"Cannot mix tz-aware with tz-naive values",
34+
"Tz-aware datetime.datetime cannot be converted "
35+
"to datetime64 unless utc=True",
36+
]
3537
)
3638

3739
for obj in [arr, arr[::-1]]:
@@ -63,10 +65,10 @@ def test_bool_dtype_raises(self):
6365

6466
def test_copy(self):
6567
data = np.array([1, 2, 3], dtype="M8[ns]")
66-
arr = DatetimeArray._from_sequence(data, copy=False)
68+
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=False)
6769
assert arr._ndarray is data
6870

69-
arr = DatetimeArray._from_sequence(data, copy=True)
71+
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=True)
7072
assert arr._ndarray is not data
7173

7274
def test_numpy_datetime_unit(self, unit):
@@ -163,7 +165,9 @@ def test_from_arrow_from_empty(unit, tz):
163165
dtype = DatetimeTZDtype(unit=unit, tz=tz)
164166

165167
result = dtype.__from_arrow__(arr)
166-
expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
168+
expected = DatetimeArray._from_sequence(
169+
np.array(data, dtype=f"datetime64[{unit}]"), dtype=np.dtype(f"M8[{unit}]")
170+
)
167171
expected = expected.tz_localize(tz=tz)
168172
tm.assert_extension_array_equal(result, expected)
169173

@@ -179,7 +183,9 @@ def test_from_arrow_from_integers():
179183
dtype = DatetimeTZDtype(unit="ns", tz="UTC")
180184

181185
result = dtype.__from_arrow__(arr)
182-
expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
186+
expected = DatetimeArray._from_sequence(
187+
np.array(data, dtype="datetime64[ns]"), dtype=np.dtype("M8[ns]")
188+
)
183189
expected = expected.tz_localize("UTC")
184190
tm.assert_extension_array_equal(result, expected)
185191

pandas/tests/arrays/test_array.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -370,11 +370,15 @@ def test_array_copy():
370370
),
371371
(
372372
np.array([1, 2], dtype="m8[ns]"),
373-
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
373+
TimedeltaArray._from_sequence(
374+
np.array([1, 2], dtype="m8[ns]"), dtype=np.dtype("m8[ns]")
375+
),
374376
),
375377
(
376378
np.array([1, 2], dtype="m8[us]"),
377-
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
379+
TimedeltaArray._from_sequence(
380+
np.array([1, 2], dtype="m8[us]"), dtype=np.dtype("m8[us]")
381+
),
378382
),
379383
# integer
380384
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),

pandas/tests/arrays/test_datetimelike.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,8 @@ def test_fillna_method_doesnt_change_orig(self, method):
257257
if self.array_cls is PeriodArray:
258258
arr = self.array_cls(data, dtype="period[D]")
259259
else:
260-
arr = self.array_cls._from_sequence(data)
260+
dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
261+
arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))
261262
arr[4] = NaT
262263

263264
fill_value = arr[3] if method == "pad" else arr[5]
@@ -273,7 +274,8 @@ def test_searchsorted(self):
273274
if self.array_cls is PeriodArray:
274275
arr = self.array_cls(data, dtype="period[D]")
275276
else:
276-
arr = self.array_cls._from_sequence(data)
277+
dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
278+
arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))
277279

278280
# scalar
279281
result = arr.searchsorted(arr[1])
@@ -739,10 +741,10 @@ def test_array_i8_dtype(self, arr1d):
739741
def test_from_array_keeps_base(self):
740742
# Ensure that DatetimeArray._ndarray.base isn't lost.
741743
arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
742-
dta = DatetimeArray._from_sequence(arr)
744+
dta = DatetimeArray._from_sequence(arr, dtype=arr.dtype)
743745

744746
assert dta._ndarray is arr
745-
dta = DatetimeArray._from_sequence(arr[:0])
747+
dta = DatetimeArray._from_sequence(arr[:0], dtype=arr.dtype)
746748
assert dta._ndarray.base is arr
747749

748750
def test_from_dti(self, arr1d):

pandas/tests/arrays/test_datetimes.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ def test_value_counts_preserves_tz(self):
499499
@pytest.mark.parametrize("method", ["pad", "backfill"])
500500
def test_fillna_preserves_tz(self, method):
501501
dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
502-
arr = DatetimeArray._from_sequence(dti, copy=True)
502+
arr = DatetimeArray._from_sequence(dti, dtype=dti.dtype, copy=True)
503503
arr[2] = pd.NaT
504504

505505
fill_val = dti[1] if method == "pad" else dti[3]
@@ -665,7 +665,9 @@ def test_shift_fill_value(self):
665665
dti = pd.date_range("2016-01-01", periods=3)
666666

667667
dta = dti._data
668-
expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
668+
expected = DatetimeArray._from_sequence(
669+
np.roll(dta._ndarray, 1), dtype=dti.dtype
670+
)
669671

670672
fv = dta[-1]
671673
for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
@@ -731,7 +733,11 @@ def test_iter_zoneinfo_fold(self, tz):
731733
)
732734
utc_vals *= 1_000_000_000
733735

734-
dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
736+
dta = (
737+
DatetimeArray._from_sequence(utc_vals, dtype=np.dtype("M8[ns]"))
738+
.tz_localize("UTC")
739+
.tz_convert(tz)
740+
)
735741

736742
left = dta[2]
737743
right = list(dta)[2]

pandas/tests/arrays/test_timedeltas.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,10 @@ def test_searchsorted_invalid_types(self, other, index):
263263
class TestUnaryOps:
264264
def test_abs(self):
265265
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
266-
arr = TimedeltaArray._from_sequence(vals)
266+
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
267267

268268
evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
269-
expected = TimedeltaArray._from_sequence(evals)
269+
expected = TimedeltaArray._from_sequence(evals, dtype=evals.dtype)
270270

271271
result = abs(arr)
272272
tm.assert_timedelta_array_equal(result, expected)
@@ -276,7 +276,7 @@ def test_abs(self):
276276

277277
def test_pos(self):
278278
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
279-
arr = TimedeltaArray._from_sequence(vals)
279+
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
280280

281281
result = +arr
282282
tm.assert_timedelta_array_equal(result, arr)
@@ -288,7 +288,7 @@ def test_pos(self):
288288

289289
def test_neg(self):
290290
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
291-
arr = TimedeltaArray._from_sequence(vals)
291+
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)
292292

293293
evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]")
294294
expected = TimedeltaArray._from_sequence(evals)

pandas/tests/base/test_conversion.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,8 @@ def test_array_multiindex_raises():
333333
# Timedelta
334334
(
335335
TimedeltaArray._from_sequence(
336-
np.array([0, 3600000000000], dtype="i8").view("m8[ns]")
336+
np.array([0, 3600000000000], dtype="i8").view("m8[ns]"),
337+
dtype=np.dtype("m8[ns]"),
337338
),
338339
np.array([0, 3600000000000], dtype="m8[ns]"),
339340
),

pandas/tests/dtypes/test_generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ class TestABCClasses:
2020
df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index)
2121
sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).standard_normal(10))
2222

23-
datetime_array = pd.core.arrays.DatetimeArray._from_sequence(datetime_index)
24-
timedelta_array = pd.core.arrays.TimedeltaArray._from_sequence(timedelta_index)
23+
datetime_array = datetime_index.array
24+
timedelta_array = timedelta_index.array
2525

2626
abc_pairs = [
2727
("ABCMultiIndex", multi_index),

pandas/tests/series/accessors/test_dt_accessor.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,8 @@ def test_end_time_timevalues(self, input_vals):
790790
# GH#17157
791791
# Check that the time part of the Period is adjusted by end_time
792792
# when using the dt accessor on a Series
793-
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
793+
dtype = pd.PeriodDtype(input_vals[0].freq)
794+
input_vals = PeriodArray._from_sequence(np.asarray(input_vals), dtype=dtype)
794795

795796
ser = Series(input_vals)
796797
result = ser.dt.end_time

0 commit comments

Comments
 (0)