diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 0d7c96a6f2f2b..67fee7c5fbadd 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -121,9 +121,8 @@ cdef class IntervalTree(IntervalMixin): """ if self._na_count > 0: return False - values = [self.right, self.left] - sort_order = np.lexsort(values) + sort_order = self.left_sorter return is_monotonic(sort_order, False)[0] def get_indexer(self, scalar_t[:] target) -> np.ndarray: diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index eacdf17b0b4d3..f1aca4717665c 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -12,6 +12,7 @@ from typing import ( import numpy as np +from pandas._libs.tslibs.nattype import NaTType from pandas._typing import npt from .timedeltas import Timedelta @@ -51,6 +52,8 @@ class BaseOffset: def __radd__(self, other: _DatetimeT) -> _DatetimeT: ... @overload def __radd__(self, other: _TimedeltaT) -> _TimedeltaT: ... + @overload + def __radd__(self, other: NaTType) -> NaTType: ... def __sub__(self: _BaseOffsetT, other: BaseOffset) -> _BaseOffsetT: ... @overload def __rsub__(self, other: npt.NDArray[np.object_]) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index a28aace5d2f15..d8bc9363f1a23 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -83,7 +83,7 @@ cdef inline bint is_integer_object(object obj) nogil: cdef inline bint is_float_object(object obj) nogil: """ - Cython equivalent of `isinstance(val, (float, np.complex_))` + Cython equivalent of `isinstance(val, (float, np.float_))` Parameters ---------- diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 87d9c39b0407c..7390b04da4787 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +from typing import final import warnings from pandas.util._decorators import doc @@ -16,6 +17,7 @@ class DirNamesMixin: _accessors: set[str] = set() _hidden_attrs: frozenset[str] = frozenset() + @final def _dir_deletions(self) -> set[str]: """ Delete unwanted __dir__ for this object. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e9eebf54df07f..adf6522f76a1a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -189,7 +189,7 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray): Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray Assumes that __new__/__init__ defines: - _data + _ndarray _freq and that the inheriting class has methods: @@ -1418,9 +1418,8 @@ def __add__(self, other): # as is_integer returns True for these if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.add - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add) # array-like others elif is_timedelta64_dtype(other_dtype): @@ -1435,9 +1434,8 @@ def __add__(self, other): elif is_integer_dtype(other_dtype): if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.add - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add) else: # Includes Categorical, other ExtensionArrays # For PeriodDtype, if self is a TimedeltaArray and other is a @@ -1477,9 +1475,8 @@ def __sub__(self, other): # as is_integer returns True for these if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.sub - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub) elif isinstance(other, Period): result = self._sub_periodlike(other) @@ -1500,9 +1497,8 @@ def __sub__(self, other): elif is_integer_dtype(other_dtype): if not is_period_dtype(self.dtype): raise integer_op_not_supported(self) - result = cast("PeriodArray", self)._addsub_int_array_or_scalar( - other * self.freq.n, operator.sub - ) + obj = cast("PeriodArray", self) + result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub) else: # Includes ExtensionArrays, float_dtype return NotImplemented diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 3c6686b5c0173..f7107a1f7c83c 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -812,6 +812,8 @@ def argsort( ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) if ascending and kind == "quicksort" and na_position == "last": + # TODO: in an IntervalIndex we can re-use the cached + # IntervalTree.left_sorter return np.lexsort((self.right, self.left)) # TODO: other cases we can use lexsort for? much more performant. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 729f34544e2bc..b823a7a51943e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -32,7 +32,6 @@ class providing the base-class of operations. cast, final, ) -import warnings import numpy as np @@ -2200,13 +2199,8 @@ def sem(self, ddof: int = 1, numeric_only: bool = False): counts = self.count() result_ilocs = result.columns.get_indexer_for(cols) count_ilocs = counts.columns.get_indexer_for(cols) - with warnings.catch_warnings(): - # TODO(2.0): once iloc[:, foo] = bar depecation is enforced, - # this catching will be unnecessary - warnings.filterwarnings( - "ignore", ".*will attempt to set the values inplace.*" - ) - result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) + + result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result @final diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index c06b6c7a9a651..b8ef925362e7b 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -863,8 +863,6 @@ def column_setitem( This is a method on the ArrayManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) - - """ if not is_integer(loc): raise TypeError("The column index should be an integer") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 713413500f64c..f65722ac9685b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -325,6 +325,7 @@ def apply(self, func, **kwargs) -> list[Block]: return self._split_op_result(result) + @final def reduce(self, func) -> list[Block]: # We will apply the function and reshape the result into a single-row # Block with the same mgr_locs; squeezing will be done at a higher level @@ -1957,19 +1958,6 @@ class ObjectBlock(NumpyBlock): __slots__ = () is_object = True - def reduce(self, func) -> list[Block]: - """ - For object-dtype, we operate column-wise. - """ - assert self.ndim == 2 - - res = func(self.values) - - assert isinstance(res, np.ndarray) - assert res.ndim == 1 - res = res.reshape(-1, 1) - return [self.make_block_same_class(res)] - @maybe_split def convert( self, @@ -1980,7 +1968,9 @@ def convert( attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! """ - if self.dtype != object: + if self.dtype != _dtype_obj: + # GH#50067 this should be impossible in ObjectBlock, but until + # that is fixed, we short-circuit here. return [self] values = self.values diff --git a/pandas/core/series.py b/pandas/core/series.py index b69fb4c1b58aa..873ebd16ac80b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -856,7 +856,6 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: # coercion __float__ = _coerce_method(float) - __long__ = _coerce_method(int) __int__ = _coerce_method(int) # ---------------------------------------------------------------------- diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a97a866a8406e..6ce2ccb3a2925 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -398,10 +398,7 @@ def _convert_listlike_datetimes( elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): - try: - return DatetimeIndex(arg, tz=tz, name=name) - except ValueError: - pass + return DatetimeIndex(arg, tz=tz, name=name) elif utc: # DatetimeArray, DatetimeIndex return arg.tz_localize("utc") diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index c9bfb5e29460e..4fb63d3c4b97b 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1751,7 +1751,7 @@ def test_td64arr_floordiv_td64arr_with_nat( expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) if box is DataFrame and using_array_manager: - # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # INFO(ArrayManager) floordiv returns integer, and ArrayManager # performs ops column-wise and thus preserves int64 dtype for # columns without missing values expected[[0, 1]] = expected[[0, 1]].astype("int64") diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c2c1073eef36d..969c6059b8d31 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -891,7 +891,6 @@ def test_isin_nan_common_float64(self, nulls_fixture): "index", [ Index(["qux", "baz", "foo", "bar"]), - # float64 Index overrides isin, so must be checked separately NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index ea526c95f20e0..3dc6f2404444b 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -745,7 +745,7 @@ def test_concat_retain_attrs(data): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("df_dtype", ["float64", "int64", "datetime64[ns]"]) @pytest.mark.parametrize("empty_dtype", [None, "float64", "object"]) -def test_concat_ignore_emtpy_object_float(empty_dtype, df_dtype): +def test_concat_ignore_empty_object_float(empty_dtype, df_dtype): # https://github.com/pandas-dev/pandas/issues/45637 df = DataFrame({"foo": [1, 2], "bar": [1, 2]}, dtype=df_dtype) empty = DataFrame(columns=["foo", "bar"], dtype=empty_dtype)