pandas-dev · WillAyd · Dec 10, 2022 · Nov 30, 2022 · Dec 1, 2022 · Dec 1, 2022
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -666,20 +666,20 @@ cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz):
 
 
 cdef tzinfo convert_timezone(
-        tzinfo tz_in,
-        tzinfo tz_out,
-        bint found_naive,
-        bint found_tz,
-        bint utc_convert,
+    tzinfo tz_in,
+    tzinfo tz_out,
+    bint found_naive,
+    bint found_tz,
+    bint utc_convert,
 ):
     """
     Validate that ``tz_in`` can be converted/localized to ``tz_out``.
 
     Parameters
     ----------
-    tz_in : tzinfo
+    tz_in : tzinfo or None
         Timezone info of element being processed.
-    tz_out : tzinfo
+    tz_out : tzinfo or None
         Timezone info of output.
     found_naive : bool
         Whether a timezone-naive element has been found so far.

diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -531,7 +531,7 @@ def assert_interval_array_equal(
 def assert_period_array_equal(left, right, obj: str = "PeriodArray") -> None:
     _check_isinstance(left, right, PeriodArray)
 
-    assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
+    assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
     assert_attr_equal("freq", left, right, obj=obj)
 
 
@@ -541,7 +541,7 @@ def assert_datetime_array_equal(
     __tracebackhide__ = True
     _check_isinstance(left, right, DatetimeArray)
 
-    assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
+    assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
     if check_freq:
         assert_attr_equal("freq", left, right, obj=obj)
     assert_attr_equal("tz", left, right, obj=obj)
@@ -552,7 +552,7 @@ def assert_timedelta_array_equal(
 ) -> None:
     __tracebackhide__ = True
     _check_isinstance(left, right, TimedeltaArray)
-    assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
+    assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray")
     if check_freq:
         assert_attr_equal("freq", left, right, obj=obj)
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -1646,13 +1646,11 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
 
 
 class ExtensionArraySupportsAnyAll(ExtensionArray):
-    def any(self, *, skipna: bool = True) -> bool:  # type: ignore[empty-body]
-        # error: Missing return statement
-        pass
+    def any(self, *, skipna: bool = True) -> bool:
+        raise AbstractMethodError(self)
 
-    def all(self, *, skipna: bool = True) -> bool:  # type: ignore[empty-body]
-        # error: Missing return statement
-        pass
+    def all(self, *, skipna: bool = True) -> bool:
+        raise AbstractMethodError(self)
 
 
 class ExtensionOpsMixin:

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -11,7 +11,6 @@
     Literal,
     Sequence,
     TypeVar,
-    Union,
     cast,
     overload,
 )
@@ -511,7 +510,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
             result = self.copy() if copy else self
 
         elif is_categorical_dtype(dtype):
-            dtype = cast("Union[str, CategoricalDtype]", dtype)
+            dtype = cast(CategoricalDtype, dtype)
 
             # GH 10696/18593/18630
             dtype = self.dtype.update_dtype(dtype)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -257,13 +257,6 @@ def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
         """
         raise AbstractMethodError(self)
 
-    # ------------------------------------------------------------------
-    # NDArrayBackedExtensionArray compat
-
-    @cache_readonly
-    def _data(self) -> np.ndarray:
-        return self._ndarray
-
     # ------------------------------------------------------------------
 
     def _box_func(self, x):

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1195,9 +1195,7 @@ def maybe_cast_to_datetime(
 
     # TODO: _from_sequence would raise ValueError in cases where
     #  _ensure_nanosecond_dtype raises TypeError
-    # Incompatible types in assignment (expression has type "Union[dtype[Any],
-    # ExtensionDtype]", variable has type "Optional[dtype[Any]]")
-    dtype = _ensure_nanosecond_dtype(dtype)  # type: ignore[assignment]
+    _ensure_nanosecond_dtype(dtype)
 
     if is_timedelta64_dtype(dtype):
         res = TimedeltaArray._from_sequence(value, dtype=dtype)
@@ -1235,12 +1233,11 @@ def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarra
     return values
 
 
-def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
+def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
     """
     Convert dtypes with granularity less than nanosecond to nanosecond
 
     >>> _ensure_nanosecond_dtype(np.dtype("M8[us]"))
-    dtype('<M8[us]')
 
     >>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))
     Traceback (most recent call last):
@@ -1277,7 +1274,6 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> DtypeObj:
                 f"dtype={dtype} is not supported. Supported resolutions are 's', "
                 "'ms', 'us', and 'ns'"
             )
-    return dtype
 
 
 # TODO: other value-dependent functions to standardize here include

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
@@ -18,7 +18,6 @@
 import pandas._libs.missing as libmissing
 from pandas._libs.tslibs import (
     NaT,
-    Period,
     iNaT,
 )
 
@@ -749,10 +748,8 @@ def isna_all(arr: ArrayLike) -> bool:
     if dtype.kind == "f" and isinstance(dtype, np.dtype):
         checker = nan_checker
 
-    elif (
-        (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"])
-        or isinstance(dtype, DatetimeTZDtype)
-        or dtype.type is Period
+    elif (isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]) or isinstance(
+        dtype, (DatetimeTZDtype, PeriodDtype)
     ):
         # error: Incompatible types in assignment (expression has type
         # "Callable[[Any], Any]", variable has type "ufunc")

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7475,7 +7475,7 @@ def _cmp_method(self, other, op):
         return self._construct_result(new_data)
 
     def _arith_method(self, other, op):
-        if ops.should_reindex_frame_op(self, other, op, 1, 1, None, None):
+        if ops.should_reindex_frame_op(self, other, op, 1, None, None):
             return ops.frame_arith_method_with_reindex(self, other, op)
 
         axis: Literal[1] = 1  # only relevant for Series other case

@@ -1647,8 +1647,6 @@ def array_func(values: ArrayLike) -> ArrayLike:
 
             return result
 
-        # TypeError -> we may have an exception in trying to aggregate
-        #  continue and exclude the block
         new_mgr = data.grouped_reduce(array_func)
 
         res = self._wrap_agged_manager(new_mgr)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -841,7 +841,9 @@ def _set_levels(
 
         self._reset_cache()
 
-    def set_levels(self, levels, *, level=None, verify_integrity: bool = True):
+    def set_levels(
+        self, levels, *, level=None, verify_integrity: bool = True
+    ) -> MultiIndex:
         """
         Set new levels on MultiIndex. Defaults to returning new index.
 
@@ -856,8 +858,7 @@ def set_levels(self, levels, *, level=None, verify_integrity: bool = True):
 
         Returns
         -------
-        new index (of same type and class...etc) or None
-            The same type as the caller or None if ``inplace=True``.
+        MultiIndex
 
         Examples
         --------

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -758,9 +758,9 @@ def fast_xs(self, loc: int) -> SingleArrayManager:
             result = dtype.construct_array_type()._from_sequence(values, dtype=dtype)
         # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT
         elif is_datetime64_ns_dtype(dtype):
-            result = DatetimeArray._from_sequence(values, dtype=dtype)._data
+            result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray
         elif is_timedelta64_ns_dtype(dtype):
-            result = TimedeltaArray._from_sequence(values, dtype=dtype)._data
+            result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray
         else:
             result = np.array(values, dtype=dtype)
         return SingleArrayManager([result], [self._axes[1]])

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -2291,6 +2291,6 @@ def external_values(values: ArrayLike) -> ArrayLike:
         # NB: for datetime64tz this is different from np.asarray(values), since
         #  that returns an object-dtype ndarray of Timestamps.
         # Avoid raising in .astype in casting from dt64tz to dt64
-        return values._data
+        return values._ndarray
     else:
         return values
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -6,7 +6,10 @@
 from __future__ import annotations
 
 import operator
-from typing import TYPE_CHECKING
+from typing import (
+    TYPE_CHECKING,
+    cast,
+)
 
 import numpy as np
 
@@ -312,7 +315,7 @@ def to_series(right):
 
 
 def should_reindex_frame_op(
-    left: DataFrame, right, op, axis, default_axis, fill_value, level
+    left: DataFrame, right, op, axis: int, fill_value, level
 ) -> bool:
     """
     Check if this is an operation between DataFrames that will need to reindex.
@@ -326,7 +329,7 @@ def should_reindex_frame_op(
     if not isinstance(right, ABCDataFrame):
         return False
 
-    if fill_value is None and level is None and axis is default_axis:
+    if fill_value is None and level is None and axis == 1:
         # TODO: any other cases we should handle here?
 
         # Intersection is always unique so we have to check the unique columns
@@ -411,26 +414,23 @@ def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt
 
 def flex_arith_method_FRAME(op):
     op_name = op.__name__.strip("_")
-    default_axis = "columns"
 
     na_op = get_array_op(op)
     doc = make_flex_doc(op_name, "dataframe")
 
     @Appender(doc)
-    def f(self, other, axis=default_axis, level=None, fill_value=None):
+    def f(self, other, axis: Axis = "columns", level=None, fill_value=None):
+        axis = self._get_axis_number(axis) if axis is not None else 1
+        axis = cast(int, axis)
 
-        if should_reindex_frame_op(
-            self, other, op, axis, default_axis, fill_value, level
-        ):
+        if should_reindex_frame_op(self, other, op, axis, fill_value, level):
             return frame_arith_method_with_reindex(self, other, op)
 
         if isinstance(other, ABCSeries) and fill_value is not None:
             # TODO: We could allow this in cases where we end up going
             #  through the DataFrame path
             raise NotImplementedError(f"fill_value {fill_value} not supported.")
 
-        axis = self._get_axis_number(axis) if axis is not None else 1
-
         other = maybe_prepare_scalar_for_op(other, self.shape)
         self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
 
@@ -456,14 +456,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
 
 def flex_comp_method_FRAME(op):
     op_name = op.__name__.strip("_")
-    default_axis = "columns"  # because we are "flex"
 
     doc = _flex_comp_doc_FRAME.format(
         op_name=op_name, desc=_op_descriptions[op_name]["desc"]
     )
 
     @Appender(doc)
-    def f(self, other, axis=default_axis, level=None):
+    def f(self, other, axis: Axis = "columns", level=None):
         axis = self._get_axis_number(axis) if axis is not None else 1
 
         self, other = align_method_FRAME(self, other, axis, flex=True, level=level)

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -355,7 +355,6 @@ def test_transform_wont_agg_series(string_series, func):
 @pytest.mark.parametrize(
     "op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
 )
-@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
 def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
     # GH 35964
     op = op_wrapper(all_reductions)

diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
@@ -2437,7 +2437,7 @@ def test_dt64arr_addsub_object_dtype_2d():
 
     assert isinstance(result, DatetimeArray)
     assert result.freq is None
-    tm.assert_numpy_array_equal(result._data, expected._data)
+    tm.assert_numpy_array_equal(result._ndarray, expected._ndarray)
 
     with tm.assert_produces_warning(PerformanceWarning):
         # Case where we expect to get a TimedeltaArray back

diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py
@@ -122,10 +122,10 @@ def test_freq_infer_raises(self):
     def test_copy(self):
         data = np.array([1, 2, 3], dtype="M8[ns]")
         arr = DatetimeArray(data, copy=False)
-        assert arr._data is data
+        assert arr._ndarray is data
 
         arr = DatetimeArray(data, copy=True)
-        assert arr._data is not data
+        assert arr._ndarray is not data
 
 
 class TestSequenceToDT64NS:

diff --git a/pandas/tests/arrays/period/test_astype.py b/pandas/tests/arrays/period/test_astype.py
@@ -42,12 +42,12 @@ def test_astype_copies():
     result = arr.astype(np.int64, copy=False)
 
     # Add the `.base`, since we now use `.asi8` which returns a view.
-    # We could maybe override it in PeriodArray to return ._data directly.
-    assert result.base is arr._data
+    # We could maybe override it in PeriodArray to return ._ndarray directly.
+    assert result.base is arr._ndarray
 
     result = arr.astype(np.int64, copy=True)
-    assert result is not arr._data
-    tm.assert_numpy_array_equal(result, arr._data.view("i8"))
+    assert result is not arr._ndarray
+    tm.assert_numpy_array_equal(result, arr._ndarray.view("i8"))
 
 
 def test_astype_categorical():