diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 39cae5b8e2683..00d46ab9296d0 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2300,7 +2300,13 @@ def _groupby_op( ) if isinstance(result, np.ndarray): return result - return type(self)._from_sequence(result, copy=False) + elif isinstance(result, BaseMaskedArray): + pa_result = result.__arrow_array__() + return type(self)(pa_result) + else: + # DatetimeArray, TimedeltaArray + pa_result = pa.array(result, from_pandas=True) + return type(self)(pa_result) def _apply_elementwise(self, func: Callable) -> list[list[Any]]: """Apply a callable to each element while maintaining the chunking structure.""" diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index fbe1677b95b33..5de5d4ee1856e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1387,7 +1387,7 @@ def __add__(self, other): if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"): from pandas.core.arrays import TimedeltaArray - return TimedeltaArray._from_sequence(result) + return TimedeltaArray._from_sequence(result, dtype=result.dtype) return result def __radd__(self, other): @@ -1447,7 +1447,7 @@ def __sub__(self, other): if isinstance(result, np.ndarray) and lib.is_np_dtype(result.dtype, "m"): from pandas.core.arrays import TimedeltaArray - return TimedeltaArray._from_sequence(result) + return TimedeltaArray._from_sequence(result, dtype=result.dtype) return result def __rsub__(self, other): @@ -1466,7 +1466,7 @@ def __rsub__(self, other): # Avoid down-casting DatetimeIndex from pandas.core.arrays import DatetimeArray - other = DatetimeArray._from_sequence(other) + other = DatetimeArray._from_sequence(other, dtype=other.dtype) return other - self elif self.dtype.kind == "M" and hasattr(other, "dtype") and not other_is_dt64: # GH#19959 datetime - datetime is well-defined as timedelta, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 201c449185057..43f4428118aa7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -818,11 +818,7 @@ def _add_offset(self, offset: BaseOffset) -> Self: stacklevel=find_stack_level(), ) res_values = self.astype("O") + offset - # TODO(GH#55564): as_unit will be unnecessary - result = type(self)._from_sequence(res_values).as_unit(self.unit) - if not len(self): - # GH#30336 _from_sequence won't be able to infer self.tz - return result.tz_localize(self.tz) + result = type(self)._from_sequence(res_values, dtype=self.dtype) else: result = type(self)._simple_new(res_values, dtype=res_values.dtype) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index aa8dacbd6aad5..7d0ad74f851f0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -812,7 +812,7 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: new_parr = self.asfreq(freq, how=how) new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base) - dta = DatetimeArray._from_sequence(new_data) + dta = DatetimeArray._from_sequence(new_data, dtype=np.dtype("M8[ns]")) if self.freq.name == "B": # See if we can retain BDay instead of Day in cases where diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index d7264c002c67f..74cc3e991bb76 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -28,10 +28,12 @@ def test_mixing_naive_tzaware_raises(self, meth): # GH#24569 arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) - msg = ( - "Cannot mix tz-aware with tz-naive values|" - "Tz-aware datetime.datetime cannot be converted " - "to datetime64 unless utc=True" + msg = "|".join( + [ + "Cannot mix tz-aware with tz-naive values", + "Tz-aware datetime.datetime cannot be converted " + "to datetime64 unless utc=True", + ] ) for obj in [arr, arr[::-1]]: @@ -63,10 +65,10 @@ def test_bool_dtype_raises(self): def test_copy(self): data = np.array([1, 2, 3], dtype="M8[ns]") - arr = DatetimeArray._from_sequence(data, copy=False) + arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=False) assert arr._ndarray is data - arr = DatetimeArray._from_sequence(data, copy=True) + arr = DatetimeArray._from_sequence(data, dtype=data.dtype, copy=True) assert arr._ndarray is not data def test_numpy_datetime_unit(self, unit): @@ -163,7 +165,9 @@ def test_from_arrow_from_empty(unit, tz): dtype = DatetimeTZDtype(unit=unit, tz=tz) result = dtype.__from_arrow__(arr) - expected = DatetimeArray._from_sequence(np.array(data, dtype=f"datetime64[{unit}]")) + expected = DatetimeArray._from_sequence( + np.array(data, dtype=f"datetime64[{unit}]"), dtype=np.dtype(f"M8[{unit}]") + ) expected = expected.tz_localize(tz=tz) tm.assert_extension_array_equal(result, expected) @@ -179,7 +183,9 @@ def test_from_arrow_from_integers(): dtype = DatetimeTZDtype(unit="ns", tz="UTC") result = dtype.__from_arrow__(arr) - expected = DatetimeArray._from_sequence(np.array(data, dtype="datetime64[ns]")) + expected = DatetimeArray._from_sequence( + np.array(data, dtype="datetime64[ns]"), dtype=np.dtype("M8[ns]") + ) expected = expected.tz_localize("UTC") tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index 4070a2844846f..3c0ef1e4d928b 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -370,11 +370,15 @@ def test_array_copy(): ), ( np.array([1, 2], dtype="m8[ns]"), - TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[ns]")), + TimedeltaArray._from_sequence( + np.array([1, 2], dtype="m8[ns]"), dtype=np.dtype("m8[ns]") + ), ), ( np.array([1, 2], dtype="m8[us]"), - TimedeltaArray._from_sequence(np.array([1, 2], dtype="m8[us]")), + TimedeltaArray._from_sequence( + np.array([1, 2], dtype="m8[us]"), dtype=np.dtype("m8[us]") + ), ), # integer ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")), diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6dd1ef9d59ab4..0c8eefab95464 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -257,7 +257,8 @@ def test_fillna_method_doesnt_change_orig(self, method): if self.array_cls is PeriodArray: arr = self.array_cls(data, dtype="period[D]") else: - arr = self.array_cls._from_sequence(data) + dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]" + arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype)) arr[4] = NaT fill_value = arr[3] if method == "pad" else arr[5] @@ -273,7 +274,8 @@ def test_searchsorted(self): if self.array_cls is PeriodArray: arr = self.array_cls(data, dtype="period[D]") else: - arr = self.array_cls._from_sequence(data) + dtype = "M8[ns]" if self.array_cls is DatetimeArray else "m8[ns]" + arr = self.array_cls._from_sequence(data, dtype=np.dtype(dtype)) # scalar result = arr.searchsorted(arr[1]) @@ -739,10 +741,10 @@ def test_array_i8_dtype(self, arr1d): def test_from_array_keeps_base(self): # Ensure that DatetimeArray._ndarray.base isn't lost. arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") - dta = DatetimeArray._from_sequence(arr) + dta = DatetimeArray._from_sequence(arr, dtype=arr.dtype) assert dta._ndarray is arr - dta = DatetimeArray._from_sequence(arr[:0]) + dta = DatetimeArray._from_sequence(arr[:0], dtype=arr.dtype) assert dta._ndarray.base is arr def test_from_dti(self, arr1d): diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 8e348805de978..e3f49d04a0ff2 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -499,7 +499,7 @@ def test_value_counts_preserves_tz(self): @pytest.mark.parametrize("method", ["pad", "backfill"]) def test_fillna_preserves_tz(self, method): dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central") - arr = DatetimeArray._from_sequence(dti, copy=True) + arr = DatetimeArray._from_sequence(dti, dtype=dti.dtype, copy=True) arr[2] = pd.NaT fill_val = dti[1] if method == "pad" else dti[3] @@ -665,7 +665,9 @@ def test_shift_fill_value(self): dti = pd.date_range("2016-01-01", periods=3) dta = dti._data - expected = DatetimeArray._from_sequence(np.roll(dta._ndarray, 1)) + expected = DatetimeArray._from_sequence( + np.roll(dta._ndarray, 1), dtype=dti.dtype + ) fv = dta[-1] for fill_value in [fv, fv.to_pydatetime(), fv.to_datetime64()]: @@ -731,7 +733,11 @@ def test_iter_zoneinfo_fold(self, tz): ) utc_vals *= 1_000_000_000 - dta = DatetimeArray._from_sequence(utc_vals).tz_localize("UTC").tz_convert(tz) + dta = ( + DatetimeArray._from_sequence(utc_vals, dtype=np.dtype("M8[ns]")) + .tz_localize("UTC") + .tz_convert(tz) + ) left = dta[2] right = list(dta)[2] diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index bcc52f197ee51..fb7c7afdc6ff9 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -263,10 +263,10 @@ def test_searchsorted_invalid_types(self, other, index): class TestUnaryOps: def test_abs(self): vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") - arr = TimedeltaArray._from_sequence(vals) + arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype) evals = np.array([3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") - expected = TimedeltaArray._from_sequence(evals) + expected = TimedeltaArray._from_sequence(evals, dtype=evals.dtype) result = abs(arr) tm.assert_timedelta_array_equal(result, expected) @@ -276,7 +276,7 @@ def test_abs(self): def test_pos(self): vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") - arr = TimedeltaArray._from_sequence(vals) + arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype) result = +arr tm.assert_timedelta_array_equal(result, arr) @@ -288,7 +288,7 @@ def test_pos(self): def test_neg(self): vals = np.array([-3600 * 10**9, "NaT", 7200 * 10**9], dtype="m8[ns]") - arr = TimedeltaArray._from_sequence(vals) + arr = TimedeltaArray._from_sequence(vals, dtype=vals.dtype) evals = np.array([3600 * 10**9, "NaT", -7200 * 10**9], dtype="m8[ns]") expected = TimedeltaArray._from_sequence(evals) diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 13a3ff048c79e..d8af7abe83084 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -333,7 +333,8 @@ def test_array_multiindex_raises(): # Timedelta ( TimedeltaArray._from_sequence( - np.array([0, 3600000000000], dtype="i8").view("m8[ns]") + np.array([0, 3600000000000], dtype="i8").view("m8[ns]"), + dtype=np.dtype("m8[ns]"), ), np.array([0, 3600000000000], dtype="m8[ns]"), ), diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 261f86bfb0326..2b90886a8d070 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -20,8 +20,8 @@ class TestABCClasses: df = pd.DataFrame({"names": ["a", "b", "c"]}, index=multi_index) sparse_array = pd.arrays.SparseArray(np.random.default_rng(2).standard_normal(10)) - datetime_array = pd.core.arrays.DatetimeArray._from_sequence(datetime_index) - timedelta_array = pd.core.arrays.TimedeltaArray._from_sequence(timedelta_index) + datetime_array = datetime_index.array + timedelta_array = timedelta_index.array abc_pairs = [ ("ABCMultiIndex", multi_index), diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index dd2ed0bd62a02..fd9fec0cb490c 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -549,7 +549,7 @@ def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series): dtype = data_for_sorting.dtype data_for_sorting = pd.array([True, False], dtype=dtype) b, a = data_for_sorting - arr = type(data_for_sorting)._from_sequence([a, b]) + arr = type(data_for_sorting)._from_sequence([a, b], dtype=dtype) if as_series: arr = pd.Series(arr) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 9b9a8ea3600ae..885adb3543b46 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -790,7 +790,8 @@ def test_end_time_timevalues(self, input_vals): # GH#17157 # Check that the time part of the Period is adjusted by end_time # when using the dt accessor on a Series - input_vals = PeriodArray._from_sequence(np.asarray(input_vals)) + dtype = pd.PeriodDtype(input_vals[0].freq) + input_vals = PeriodArray._from_sequence(np.asarray(input_vals), dtype=dtype) ser = Series(input_vals) result = ser.dt.end_time