CLN: assorted (#54673)

jbrockmendel · web-flow · commit c233649b0f03 · 2023-08-22T11:43:01.000-07:00
* CLN: assorted

* typo fixup
diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
@@ -66,8 +66,7 @@ def take_nd(
     """
     Specialized Cython take which sets NaN values in one pass
 
-    This dispatches to ``take`` defined on ExtensionArrays. It does not
-    currently dispatch to ``SparseArray.take`` for sparse ``arr``.
+    This dispatches to ``take`` defined on ExtensionArrays.
 
     Note: this function assumes that the indexer is a valid(ated) indexer with
     no out of bound indices.
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -2042,6 +2042,7 @@ def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:
         result[~mask] = val
         return result
 
+    # TODO(3.0): this can be removed once GH#33302 deprecation is enforced
     def _fill_mask_inplace(
         self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
     ) -> None:
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2898,17 +2898,15 @@ def _delegate_method(self, name: str, *args, **kwargs):
 # utility routines
 
 
-def _get_codes_for_values(values, categories: Index) -> np.ndarray:
+def _get_codes_for_values(
+    values: Index | Series | ExtensionArray | np.ndarray,
+    categories: Index,
+) -> np.ndarray:
     """
     utility routine to turn values into codes given the specified categories
 
     If `values` is known to be a Categorical, use recode_for_categories instead.
     """
-    if values.ndim > 1:
-        flat = values.ravel()
-        codes = _get_codes_for_values(flat, categories)
-        return codes.reshape(values.shape)
-
     codes = categories.get_indexer_for(values)
     return coerce_indexer_dtype(codes, categories)
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -420,6 +420,7 @@ def is_terminal() -> bool:
 
 
 def use_inf_as_na_cb(key) -> None:
+    # TODO(3.0): enforcing this deprecation will close GH#52501
     from pandas.core.dtypes.missing import _use_inf_as_na
 
     _use_inf_as_na(key)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1707,8 +1707,6 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
                 arr._validate_setitem_value(element)
                 return True
             except (ValueError, TypeError):
-                # TODO: re-use _catch_deprecated_value_error to ensure we are
-                #  strict about what exceptions we allow through here.
                 return False
 
         # This is technically incorrect, but maintains the behavior of
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -985,6 +985,7 @@ def __new__(cls, freq):
 
         if isinstance(freq, BDay):
             # GH#53446
+            # TODO(3.0): enforcing this will close GH#10575
             warnings.warn(
                 "PeriodDtype[B] is deprecated and will be removed in a future "
                 "version. Use a DatetimeIndex with freq='B' instead",
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -255,8 +255,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
         "_is_copy",
         "_name",
         "_metadata",
-        "__array_struct__",
-        "__array_interface__",
         "_flags",
     ]
     _internal_names_set: set[str] = set(_internal_names)
@@ -6973,6 +6971,9 @@ def _pad_or_backfill(
         method = clean_fill_method(method)
 
         if not self._mgr.is_single_block and axis == 1:
+            # e.g. test_align_fill_method
+            # TODO(3.0): once downcast is removed, we can do the .T
+            #  in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
             if inplace:
                 raise NotImplementedError()
             result = self.T._pad_or_backfill(method=method, limit=limit).T
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -441,6 +441,8 @@ def indexer(self):
     @final
     @property
     def obj(self):
+        # TODO(3.0): enforcing these deprecations on Grouper should close
+        #  GH#25564, GH#41930
         warnings.warn(
             f"{type(self).__name__}.obj is deprecated and will be removed "
             "in a future version. Use GroupBy.indexer instead.",
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3782,17 +3782,23 @@ def get_loc(self, key):
             self._check_indexing_error(key)
             raise
 
-    _index_shared_docs[
-        "get_indexer"
-    ] = """
+    @final
+    def get_indexer(
+        self,
+        target,
+        method: ReindexMethod | None = None,
+        limit: int | None = None,
+        tolerance=None,
+    ) -> npt.NDArray[np.intp]:
+        """
         Compute indexer and mask for new index given the current index.
 
         The indexer should be then used as an input to ndarray.take to align the
         current data to the new index.
 
         Parameters
         ----------
-        target : %(target_klass)s
+        target : Index
         method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
             * default: exact matches only.
             * pad / ffill: find the PREVIOUS index value if no exact match.
@@ -3819,7 +3825,7 @@ def get_loc(self, key):
             Integers from 0 to n - 1 indicating that the index at these
             positions matches the corresponding target values. Missing values
             in the target are marked by -1.
-        %(raises_section)s
+
         Notes
         -----
         Returns -1 for unmatched values, for further explanation see the
@@ -3834,16 +3840,6 @@ def get_loc(self, key):
         Notice that the return value is an array of locations in ``index``
         and ``x`` is marked by -1, as it is not in ``index``.
         """
-
-    @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
-    @final
-    def get_indexer(
-        self,
-        target,
-        method: ReindexMethod | None = None,
-        limit: int | None = None,
-        tolerance=None,
-    ) -> npt.NDArray[np.intp]:
         method = clean_reindex_fill_method(method)
         orig_target = target
         target = self._maybe_cast_listlike_indexer(target)
@@ -3898,7 +3894,7 @@ def get_indexer(
 
             return ensure_platform_int(indexer)
 
-        pself, ptarget = self._maybe_promote(target)
+        pself, ptarget = self._maybe_downcast_for_indexing(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer(
                 ptarget, method=method, limit=limit, tolerance=tolerance
@@ -4582,7 +4578,7 @@ def join(
 
         if not self._is_multi and not other._is_multi:
             # We have specific handling for MultiIndex below
-            pself, pother = self._maybe_promote(other)
+            pself, pother = self._maybe_downcast_for_indexing(other)
             if pself is not self or pother is not other:
                 return pself.join(
                     pother, how=how, level=level, return_indexers=True, sort=sort
@@ -6046,7 +6042,7 @@ def get_indexer_non_unique(
             #  that can be matched to Interval scalars.
             return self._get_indexer_non_comparable(target, method=None, unique=False)
 
-        pself, ptarget = self._maybe_promote(target)
+        pself, ptarget = self._maybe_downcast_for_indexing(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer_non_unique(ptarget)
 
@@ -6062,8 +6058,8 @@ def get_indexer_non_unique(
         # TODO: get_indexer has fastpaths for both Categorical-self and
         #  Categorical-target. Can we do something similar here?
 
-        # Note: _maybe_promote ensures we never get here with MultiIndex
-        #  self and non-Multi target
+        # Note: _maybe_downcast_for_indexing ensures we never get here
+        #  with MultiIndex self and non-Multi target
         tgt_values = target._get_engine_target()
         if self._is_multi and target._is_multi:
             engine = self._engine
@@ -6237,7 +6233,7 @@ def _index_as_unique(self) -> bool:
     _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
 
     @final
-    def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
+    def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
         """
         When dealing with an object-dtype Index and a non-object Index, see
         if we can upcast the object-dtype one to improve performance.
@@ -6278,7 +6274,7 @@ def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
 
         if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
             # Reverse op so we dont need to re-implement on the subclasses
-            other, self = other._maybe_promote(self)
+            other, self = other._maybe_downcast_for_indexing(self)
 
         return self, other
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -27,7 +27,6 @@
     BlockValuesRefs,
 )
 from pandas._libs.missing import NA
-from pandas._libs.tslibs import IncompatibleFrequency
 from pandas._typing import (
     ArrayLike,
     AxisInt,
@@ -1731,9 +1730,7 @@ def setitem(self, indexer, value, using_cow: bool = False):
 
         try:
             values[indexer] = value
-        except (ValueError, TypeError) as err:
-            _catch_deprecated_value_error(err)
-
+        except (ValueError, TypeError):
             if isinstance(self.dtype, IntervalDtype):
                 # see TestSetitemFloatIntervalWithIntIntervalValues
                 nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
@@ -1776,9 +1773,7 @@ def where(
 
         try:
             res_values = arr._where(cond, other).T
-        except (ValueError, TypeError) as err:
-            _catch_deprecated_value_error(err)
-
+        except (ValueError, TypeError):
             if self.ndim == 1 or self.shape[0] == 1:
                 if isinstance(self.dtype, IntervalDtype):
                     # TestSetitemFloatIntervalWithIntIntervalValues
@@ -1847,9 +1842,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:
         try:
             # Caller is responsible for ensuring matching lengths
             values._putmask(mask, new)
-        except (TypeError, ValueError) as err:
-            _catch_deprecated_value_error(err)
-
+        except (TypeError, ValueError):
             if self.ndim == 1 or self.shape[0] == 1:
                 if isinstance(self.dtype, IntervalDtype):
                     # Discussion about what we want to support in the general
@@ -2256,19 +2249,6 @@ def is_view(self) -> bool:
         return self.values._ndarray.base is not None
 
 
-def _catch_deprecated_value_error(err: Exception) -> None:
-    """
-    We catch ValueError for now, but only a specific one raised by DatetimeArray
-    which will no longer be raised in version 2.0.
-    """
-    if isinstance(err, ValueError):
-        if isinstance(err, IncompatibleFrequency):
-            pass
-        elif "'value.closed' is" in str(err):
-            # IntervalDtype mismatched 'closed'
-            pass
-
-
 class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
     """Block for datetime64[ns], timedelta64[ns]."""
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3985,6 +3985,8 @@ def argsort(
         mask = isna(values)
 
         if mask.any():
+            # TODO(3.0): once this deprecation is enforced we can call
+            #  self.array.argsort directly, which will close GH#43840
             warnings.warn(
                 "The behavior of Series.argsort in the presence of NA values is "
                 "deprecated. In a future version, NA values will be ordered "
@@ -5199,6 +5201,7 @@ def info(
             show_counts=show_counts,
         )
 
+    # TODO(3.0): this can be removed once GH#33302 deprecation is enforced
     def _replace_single(self, to_replace, method: str, inplace: bool, limit):
         """
         Replaces values in a Series using the fill method specified when no
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
@@ -94,6 +94,14 @@ def test_astype_roundtrip(dtype):
     result = casted.astype("datetime64[ns]")
     tm.assert_series_equal(result, ser)
 
+    # GH#38509 same thing for timedelta64
+    ser2 = ser - ser.iloc[-1]
+    casted2 = ser2.astype(dtype)
+    assert is_dtype_equal(casted2.dtype, dtype)
+
+    result2 = casted2.astype(ser2.dtype)
+    tm.assert_series_equal(result2, ser2)
+
 
 def test_add(dtype):
     a = pd.Series(["a", "b", "c", None, None], dtype=dtype)
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
@@ -83,6 +83,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
         ser = pd.Series(data)
 
         if not self._supports_reduction(ser, op_name):
+            # TODO: the message being checked here isn't actually checking anything
             msg = (
                 "[Cc]annot perform|Categorical is not ordered for operation|"
                 "does not support reduction|"
@@ -101,6 +102,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
         ser = pd.Series(data)
 
         if not self._supports_reduction(ser, op_name):
+            # TODO: the message being checked here isn't actually checking anything
             msg = (
                 "[Cc]annot perform|Categorical is not ordered for operation|"
                 "does not support reduction|"
diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py
@@ -176,9 +176,13 @@ def isna(self) -> np.ndarray:
     @classmethod
     def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
         if isinstance(scalars, dt.date):
-            pass
+            raise TypeError
         elif isinstance(scalars, DateArray):
-            pass
+            if dtype is not None:
+                return scalars.astype(dtype, copy=copy)
+            if copy:
+                return scalars.copy()
+            return scalars[:]
         elif isinstance(scalars, np.ndarray):
             scalars = scalars.astype("U10")  # 10 chars for yyyy-mm-dd
             return DateArray(scalars)
diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py
@@ -220,8 +220,10 @@ def test_fillna_no_op_returns_copy(self, data, request):
         super().test_fillna_no_op_returns_copy(data)
 
     @pytest.mark.xfail(reason="Unsupported")
-    def test_fillna_series(self):
+    def test_fillna_series(self, data_missing):
         # this one looks doable.
+        # TODO: this fails bc we do not pass through data_missing. If we did,
+        #  the 0-fill case would xpass
         super().test_fillna_series()
 
     def test_fillna_frame(self, data_missing):
@@ -349,7 +351,9 @@ def test_map_raises(self, data, na_action):
 
 class TestCasting(BaseSparseTests, base.BaseCastingTests):
     @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")
-    def test_astype_string(self, data):
+    def test_astype_string(self, data, nullable_string_dtype):
+        # TODO: this fails bc we do not pass through nullable_string_dtype;
+        #  If we did, the 0-cases would xpass
         super().test_astype_string(data)
 
 
diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
@@ -201,7 +201,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
 
 class Test2DCompat(base.Dim2CompatTests):
     @pytest.fixture(autouse=True)
-    def arrow_not_supported(self, data, request):
+    def arrow_not_supported(self, data):
         if isinstance(data, ArrowStringArray):
             pytest.skip(reason="2D support not implemented for ArrowStringArray")
 
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
@@ -25,12 +25,7 @@
 def test_reindex(datetime_series, string_series):
     identity = string_series.reindex(string_series.index)
 
-    # __array_interface__ is not defined for older numpies
-    # and on some pythons
-    try:
-        assert np.may_share_memory(string_series.index, identity.index)
-    except AttributeError:
-        pass
+    assert np.may_share_memory(string_series.index, identity.index)
 
     assert identity.index.is_(string_series.index)
     assert identity.index.identical(string_series.index)
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
@@ -777,7 +777,7 @@ class TestNamePreservation:
     @pytest.mark.parametrize("box", [list, tuple, np.array, Index, Series, pd.array])
     @pytest.mark.parametrize("flex", [True, False])
     def test_series_ops_name_retention(self, flex, box, names, all_binary_operators):
-        # GH#33930 consistent name renteiton
+        # GH#33930 consistent name-retention
         op = all_binary_operators
 
         left = Series(range(10), name=names[0])