Skip to content

REF: pass dtype explicitly to _from_sequence #59774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2300,7 +2300,13 @@ def _groupby_op(
)
if isinstance(result, np.ndarray):
return result
return type(self)._from_sequence(result, copy=False)
elif isinstance(result, BaseMaskedArray):
pa_result = result.__arrow_array__()
return type(self)(pa_result)
else:
# DatetimeArray, TimedeltaArray
pa_result = pa.array(result, from_pandas=True)
return type(self)(pa_result)

def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
"""Apply a callable to each element while maintaining the chunking structure."""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,7 +1387,7 @@ def __add__(self, other):
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._from_sequence(result)
return TimedeltaArray._from_sequence(result, dtype=result.dtype)
return result

def __radd__(self, other):
Expand Down Expand Up @@ -1447,7 +1447,7 @@ def __sub__(self, other):
if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"):
from pandas.core.arrays import TimedeltaArray

return TimedeltaArray._from_sequence(result)
return TimedeltaArray._from_sequence(result, dtype=result.dtype)
return result

def __rsub__(self, other):
Expand All @@ -1466,7 +1466,7 @@ def __rsub__(self, other):
# Avoid down-casting DatetimeIndex
from pandas.core.arrays import DatetimeArray

other = DatetimeArray._from_sequence(other)
other = DatetimeArray._from_sequence(other, dtype=other.dtype)
return other - self
elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64:
# GH#19959 datetime - datetime is well-defined as timedelta,
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,11 +818,7 @@ def _add_offset(self, offset: BaseOffset) -> Self:
stacklevel=find_stack_level(),
)
res_values = self.astype("O") + offset
# TODO(GH#55564): as_unit will be unnecessary
result = type(self)._from_sequence(res_values).as_unit(self.unit)
if not len(self):
# GH#30336 _from_sequence won't be able to infer self.tz
return result.tz_localize(self.tz)
result = type(self)._from_sequence(res_values, dtype=self.dtype)

else:
result = type(self)._simple_new(res_values, dtype=res_values.dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,7 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
new_parr = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
dta = DatetimeArray._from_sequence(new_data)
dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]"))

if self.freq.name == "B":
# See if we can retain BDay instead of Day in cases where
Expand Down
22 changes: 14 additions & 8 deletions pandas/tests/arrays/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ def test_mixing_naive_tzaware_raises(self, meth):
# GH#24569
arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")])

msg = (
"Cannot mix tz-aware with tz-naive values|"
"Tz-aware datetime.datetime cannot be converted "
"to datetime64 unless utc=True"
msg = "|".join(
[
"Cannot mix tz-aware with tz-naive values",
"Tz-aware datetime.datetime cannot be converted "
"to datetime64 unless utc=True",
]
)

for obj in [arr, arr[::-1]]:
Expand Down Expand Up @@ -63,10 +65,10 @@ def test_bool_dtype_raises(self):

def test_copy(self):
data = np.array([1, 2, 3], dtype="M8[ns]")
arr = DatetimeArray._from_sequence(data, copy=False)
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=False)
assert arr._ndarray is data

arr = DatetimeArray._from_sequence(data, copy=True)
arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=True)
assert arr._ndarray is not data

def test_numpy_datetime_unit(self, unit):
Expand Down Expand Up @@ -163,7 +165,9 @@ def test_from_arrow_from_empty(unit, tz):
dtype = DatetimeTZDtype(unit=unit, tz=tz)

result = dtype.__from_arrow__(arr)
expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]"))
expected = DatetimeArray._from_sequence(
np.array(data, dtype=f"datetime64[{unit}]"), dtype=np.dtype(f"M8[{unit}]")
)
expected = expected.tz_localize(tz=tz)
tm.assert_extension_array_equal(result, expected)

Expand All @@ -179,7 +183,9 @@ def test_from_arrow_from_integers():
dtype = DatetimeTZDtype(unit="ns", tz="UTC")

result = dtype.__from_arrow__(arr)
expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]"))
expected = DatetimeArray._from_sequence(
np.array(data, dtype="datetime64[ns]"), dtype=np.dtype("M8[ns]")
)
expected = expected.tz_localize("UTC")
tm.assert_extension_array_equal(result, expected)

Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,15 @@ def test_array_copy():
),
(
np.array([1, 2], dtype="m8[ns]"),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")),
TimedeltaArray._from_sequence(
np.array([1, 2], dtype="m8[ns]"), dtype=np.dtype("m8[ns]")
),
),
(
np.array([1, 2], dtype="m8[us]"),
TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")),
TimedeltaArray._from_sequence(
np.array([1, 2], dtype="m8[us]"), dtype=np.dtype("m8[us]")
),
),
# integer
([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ def test_fillna_method_doesnt_change_orig(self, method):
if self.array_cls is PeriodArray:
arr = self.array_cls(data, dtype="period[D]")
else:
arr = self.array_cls._from_sequence(data)
dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))
arr[4] = NaT

fill_value = arr[3] if method == "pad" else arr[5]
Expand All @@ -273,7 +274,8 @@ def test_searchsorted(self):
if self.array_cls is PeriodArray:
arr = self.array_cls(data, dtype="period[D]")
else:
arr = self.array_cls._from_sequence(data)
dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]"
arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype))

# scalar
result = arr.searchsorted(arr[1])
Expand Down Expand Up @@ -739,10 +741,10 @@ def test_array_i8_dtype(self, arr1d):
def test_from_array_keeps_base(self):
# Ensure that DatetimeArray._ndarray.base isn't lost.
arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
dta = DatetimeArray._from_sequence(arr)
dta = DatetimeArray._from_sequence(arr, dtype=arr.dtype)

assert dta._ndarray is arr
dta = DatetimeArray._from_sequence(arr[:0])
dta = DatetimeArray._from_sequence(arr[:0], dtype=arr.dtype)
assert dta._ndarray.base is arr

def test_from_dti(self, arr1d):
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def test_value_counts_preserves_tz(self):
@pytest.mark.parametrize("method", ["pad", "backfill"])
def test_fillna_preserves_tz(self, method):
dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central")
arr = DatetimeArray._from_sequence(dti, copy=True)
arr = DatetimeArray._from_sequence(dti, dtype=dti.dtype, copy=True)
arr[2] = pd.NaT

fill_val = dti[1] if method == "pad" else dti[3]
Expand Down Expand Up @@ -665,7 +665,9 @@ def test_shift_fill_value(self):
dti = pd.date_range("2016-01-01", periods=3)

dta = dti._data
expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1))
expected = DatetimeArray._from_sequence(
np.roll(dta._ndarray, 1), dtype=dti.dtype
)

fv = dta[-1]
for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
Expand Down Expand Up @@ -731,7 +733,11 @@ def test_iter_zoneinfo_fold(self, tz):
)
utc_vals *= 1_000_000_000

dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz)
dta = (
DatetimeArray._from_sequence(utc_vals, dtype=np.dtype("M8[ns]"))
.tz_localize("UTC")
.tz_convert(tz)
)

left = dta[2]
right = list(dta)[2]
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,10 @@ def test_searchsorted_invalid_types(self, other, index):
class TestUnaryOps:
def test_abs(self):
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
arr = TimedeltaArray._from_sequence(vals)
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)

evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
expected = TimedeltaArray._from_sequence(evals)
expected = TimedeltaArray._from_sequence(evals, dtype=evals.dtype)

result = abs(arr)
tm.assert_timedelta_array_equal(result, expected)
Expand All @@ -276,7 +276,7 @@ def test_abs(self):

def test_pos(self):
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
arr = TimedeltaArray._from_sequence(vals)
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)

result = +arr
tm.assert_timedelta_array_equal(result, arr)
Expand All @@ -288,7 +288,7 @@ def test_pos(self):

def test_neg(self):
vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]")
arr = TimedeltaArray._from_sequence(vals)
arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype)

evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]")
expected = TimedeltaArray._from_sequence(evals)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ def test_array_multiindex_raises():
# Timedelta
(
TimedeltaArray._from_sequence(
np.array([0, 3600000000000], dtype="i8").view("m8[ns]")
np.array([0, 3600000000000], dtype="i8").view("m8[ns]"),
dtype=np.dtype("m8[ns]"),
),
np.array([0, 3600000000000], dtype="m8[ns]"),
),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class TestABCClasses:
df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index)
sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).standard_normal(10))

datetime_array = pd.core.arrays.DatetimeArray._from_sequence(datetime_index)
timedelta_array = pd.core.arrays.TimedeltaArray._from_sequence(timedelta_index)
datetime_array = datetime_index.array
timedelta_array = timedelta_index.array

abc_pairs = [
("ABCMultiIndex", multi_index),
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series):
dtype = data_for_sorting.dtype
data_for_sorting = pd.array([True, False], dtype=dtype)
b, a = data_for_sorting
arr = type(data_for_sorting)._from_sequence([a, b])
arr = type(data_for_sorting)._from_sequence([a, b], dtype=dtype)

if as_series:
arr = pd.Series(arr)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,8 @@ def test_end_time_timevalues(self, input_vals):
# GH#17157
# Check that the time part of the Period is adjusted by end_time
# when using the dt accessor on a Series
input_vals = PeriodArray._from_sequence(np.asarray(input_vals))
dtype = pd.PeriodDtype(input_vals[0].freq)
input_vals = PeriodArray._from_sequence(np.asarray(input_vals), dtype=dtype)

ser = Series(input_vals)
result = ser.dt.end_time
Expand Down
Loading