diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 8ea70e2694d92..ac674e31586e7 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -66,8 +66,7 @@ def take_nd( """ Specialized Cython take which sets NaN values in one pass - This dispatches to ``take`` defined on ExtensionArrays. It does not - currently dispatch to ``SparseArray.take`` for sparse ``arr``. + This dispatches to ``take`` defined on ExtensionArrays. Note: this function assumes that the indexer is a valid(ated) indexer with no out of bound indices. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 3a6db34b0e8b5..3827b5b5d40b2 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -2042,6 +2042,7 @@ def _where(self, mask: npt.NDArray[np.bool_], value) -> Self: result[~mask] = val return result + # TODO(3.0): this can be removed once GH#33302 deprecation is enforced def _fill_mask_inplace( self, method: str, limit: int | None, mask: npt.NDArray[np.bool_] ) -> None: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index aefc94ebd665c..dae0fb7782791 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2898,17 +2898,15 @@ def _delegate_method(self, name: str, *args, **kwargs): # utility routines -def _get_codes_for_values(values, categories: Index) -> np.ndarray: +def _get_codes_for_values( + values: Index | Series | ExtensionArray | np.ndarray, + categories: Index, +) -> np.ndarray: """ utility routine to turn values into codes given the specified categories If `values` is known to be a Categorical, use recode_for_categories instead. """ - if values.ndim > 1: - flat = values.ravel() - codes = _get_codes_for_values(flat, categories) - return codes.reshape(values.shape) - codes = categories.get_indexer_for(values) return coerce_indexer_dtype(codes, categories) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 27e9bf8958ab0..0e857626b5697 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -420,6 +420,7 @@ def is_terminal() -> bool: def use_inf_as_na_cb(key) -> None: + # TODO(3.0): enforcing this deprecation will close GH#52501 from pandas.core.dtypes.missing import _use_inf_as_na _use_inf_as_na(key) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 657cbce40087a..aa228191adc62 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1707,8 +1707,6 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: arr._validate_setitem_value(element) return True except (ValueError, TypeError): - # TODO: re-use _catch_deprecated_value_error to ensure we are - # strict about what exceptions we allow through here. return False # This is technically incorrect, but maintains the behavior of diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index a9618963e0a51..59939057d4b37 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -985,6 +985,7 @@ def __new__(cls, freq): if isinstance(freq, BDay): # GH#53446 + # TODO(3.0): enforcing this will close GH#10575 warnings.warn( "PeriodDtype[B] is deprecated and will be removed in a future " "version. Use a DatetimeIndex with freq='B' instead", diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7da41b890598d..32069575c807b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -255,8 +255,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin): "_is_copy", "_name", "_metadata", - "__array_struct__", - "__array_interface__", "_flags", ] _internal_names_set: set[str] = set(_internal_names) @@ -6970,6 +6968,9 @@ def _pad_or_backfill( method = clean_fill_method(method) if not self._mgr.is_single_block and axis == 1: + # e.g. test_align_fill_method + # TODO(3.0): once downcast is removed, we can do the .T + # in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill. if inplace: raise NotImplementedError() result = self.T._pad_or_backfill(method=method, limit=limit).T diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index ea92fbae9566d..95cb114c1472a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -441,6 +441,8 @@ def indexer(self): @final @property def obj(self): + # TODO(3.0): enforcing these deprecations on Grouper should close + # GH#25564, GH#41930 warnings.warn( f"{type(self).__name__}.obj is deprecated and will be removed " "in a future version. Use GroupBy.indexer instead.", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 796aadf9e4061..93b99b7647fc0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3782,9 +3782,15 @@ def get_loc(self, key): self._check_indexing_error(key) raise - _index_shared_docs[ - "get_indexer" - ] = """ + @final + def get_indexer( + self, + target, + method: ReindexMethod | None = None, + limit: int | None = None, + tolerance=None, + ) -> npt.NDArray[np.intp]: + """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the @@ -3792,7 +3798,7 @@ def get_loc(self, key): Parameters ---------- - target : %(target_klass)s + target : Index method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional * default: exact matches only. * pad / ffill: find the PREVIOUS index value if no exact match. @@ -3819,7 +3825,7 @@ def get_loc(self, key): Integers from 0 to n - 1 indicating that the index at these positions matches the corresponding target values. Missing values in the target are marked by -1. - %(raises_section)s + Notes ----- Returns -1 for unmatched values, for further explanation see the @@ -3834,16 +3840,6 @@ def get_loc(self, key): Notice that the return value is an array of locations in ``index`` and ``x`` is marked by -1, as it is not in ``index``. """ - - @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) - @final - def get_indexer( - self, - target, - method: ReindexMethod | None = None, - limit: int | None = None, - tolerance=None, - ) -> npt.NDArray[np.intp]: method = clean_reindex_fill_method(method) orig_target = target target = self._maybe_cast_listlike_indexer(target) @@ -3898,7 +3894,7 @@ def get_indexer( return ensure_platform_int(indexer) - pself, ptarget = self._maybe_promote(target) + pself, ptarget = self._maybe_downcast_for_indexing(target) if pself is not self or ptarget is not target: return pself.get_indexer( ptarget, method=method, limit=limit, tolerance=tolerance @@ -4582,7 +4578,7 @@ def join( if not self._is_multi and not other._is_multi: # We have specific handling for MultiIndex below - pself, pother = self._maybe_promote(other) + pself, pother = self._maybe_downcast_for_indexing(other) if pself is not self or pother is not other: return pself.join( pother, how=how, level=level, return_indexers=True, sort=sort @@ -6046,7 +6042,7 @@ def get_indexer_non_unique( # that can be matched to Interval scalars. return self._get_indexer_non_comparable(target, method=None, unique=False) - pself, ptarget = self._maybe_promote(target) + pself, ptarget = self._maybe_downcast_for_indexing(target) if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) @@ -6062,8 +6058,8 @@ def get_indexer_non_unique( # TODO: get_indexer has fastpaths for both Categorical-self and # Categorical-target. Can we do something similar here? - # Note: _maybe_promote ensures we never get here with MultiIndex - # self and non-Multi target + # Note: _maybe_downcast_for_indexing ensures we never get here + # with MultiIndex self and non-Multi target tgt_values = target._get_engine_target() if self._is_multi and target._is_multi: engine = self._engine @@ -6237,7 +6233,7 @@ def _index_as_unique(self) -> bool: _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" @final - def _maybe_promote(self, other: Index) -> tuple[Index, Index]: + def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]: """ When dealing with an object-dtype Index and a non-object Index, see if we can upcast the object-dtype one to improve performance. @@ -6278,7 +6274,7 @@ def _maybe_promote(self, other: Index) -> tuple[Index, Index]: if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): # Reverse op so we dont need to re-implement on the subclasses - other, self = other._maybe_promote(self) + other, self = other._maybe_downcast_for_indexing(self) return self, other diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eca0df67ff054..ffaeef14e42a5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -27,7 +27,6 @@ BlockValuesRefs, ) from pandas._libs.missing import NA -from pandas._libs.tslibs import IncompatibleFrequency from pandas._typing import ( ArrayLike, AxisInt, @@ -1731,9 +1730,7 @@ def setitem(self, indexer, value, using_cow: bool = False): try: values[indexer] = value - except (ValueError, TypeError) as err: - _catch_deprecated_value_error(err) - + except (ValueError, TypeError): if isinstance(self.dtype, IntervalDtype): # see TestSetitemFloatIntervalWithIntIntervalValues nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True) @@ -1776,9 +1773,7 @@ def where( try: res_values = arr._where(cond, other).T - except (ValueError, TypeError) as err: - _catch_deprecated_value_error(err) - + except (ValueError, TypeError): if self.ndim == 1 or self.shape[0] == 1: if isinstance(self.dtype, IntervalDtype): # TestSetitemFloatIntervalWithIntIntervalValues @@ -1847,9 +1842,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: try: # Caller is responsible for ensuring matching lengths values._putmask(mask, new) - except (TypeError, ValueError) as err: - _catch_deprecated_value_error(err) - + except (TypeError, ValueError): if self.ndim == 1 or self.shape[0] == 1: if isinstance(self.dtype, IntervalDtype): # Discussion about what we want to support in the general @@ -2256,19 +2249,6 @@ def is_view(self) -> bool: return self.values._ndarray.base is not None -def _catch_deprecated_value_error(err: Exception) -> None: - """ - We catch ValueError for now, but only a specific one raised by DatetimeArray - which will no longer be raised in version 2.0. - """ - if isinstance(err, ValueError): - if isinstance(err, IncompatibleFrequency): - pass - elif "'value.closed' is" in str(err): - # IntervalDtype mismatched 'closed' - pass - - class DatetimeLikeBlock(NDArrayBackedExtensionBlock): """Block for datetime64[ns], timedelta64[ns].""" diff --git a/pandas/core/series.py b/pandas/core/series.py index 564c799d7ab66..9a934217ed5c1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3985,6 +3985,8 @@ def argsort( mask = isna(values) if mask.any(): + # TODO(3.0): once this deprecation is enforced we can call + # self.array.argsort directly, which will close GH#43840 warnings.warn( "The behavior of Series.argsort in the presence of NA values is " "deprecated. In a future version, NA values will be ordered " @@ -5199,6 +5201,7 @@ def info( show_counts=show_counts, ) + # TODO(3.0): this can be removed once GH#33302 deprecation is enforced def _replace_single(self, to_replace, method: str, inplace: bool, limit): """ Replaces values in a Series using the fill method specified when no diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index de93e89ecacd5..1476ef87f4666 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -94,6 +94,14 @@ def test_astype_roundtrip(dtype): result = casted.astype("datetime64[ns]") tm.assert_series_equal(result, ser) + # GH#38509 same thing for timedelta64 + ser2 = ser - ser.iloc[-1] + casted2 = ser2.astype(dtype) + assert is_dtype_equal(casted2.dtype, dtype) + + result2 = casted2.astype(ser2.dtype) + tm.assert_series_equal(result2, ser2) + def test_add(dtype): a = pd.Series(["a", "b", "c", None, None], dtype=dtype) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 9b56b10681e15..4edbcacffe6af 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -83,6 +83,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): ser = pd.Series(data) if not self._supports_reduction(ser, op_name): + # TODO: the message being checked here isn't actually checking anything msg = ( "[Cc]annot perform|Categorical is not ordered for operation|" "does not support reduction|" @@ -101,6 +102,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): ser = pd.Series(data) if not self._supports_reduction(ser, op_name): + # TODO: the message being checked here isn't actually checking anything msg = ( "[Cc]annot perform|Categorical is not ordered for operation|" "does not support reduction|" diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index 39accd6d223a7..2306f5974ba18 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -176,9 +176,13 @@ def isna(self) -> np.ndarray: @classmethod def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False): if isinstance(scalars, dt.date): - pass + raise TypeError elif isinstance(scalars, DateArray): - pass + if dtype is not None: + return scalars.astype(dtype, copy=copy) + if copy: + return scalars.copy() + return scalars[:] elif isinstance(scalars, np.ndarray): scalars = scalars.astype("U10") # 10 chars for yyyy-mm-dd return DateArray(scalars) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 01448a2f83f75..7330e03a57daf 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -220,8 +220,10 @@ def test_fillna_no_op_returns_copy(self, data, request): super().test_fillna_no_op_returns_copy(data) @pytest.mark.xfail(reason="Unsupported") - def test_fillna_series(self): + def test_fillna_series(self, data_missing): # this one looks doable. + # TODO: this fails bc we do not pass through data_missing. If we did, + # the 0-fill case would xpass super().test_fillna_series() def test_fillna_frame(self, data_missing): @@ -349,7 +351,9 @@ def test_map_raises(self, data, na_action): class TestCasting(BaseSparseTests, base.BaseCastingTests): @pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype") - def test_astype_string(self, data): + def test_astype_string(self, data, nullable_string_dtype): + # TODO: this fails bc we do not pass through nullable_string_dtype; + # If we did, the 0-cases would xpass super().test_astype_string(data) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 8268de9a47f11..00e5e411bd0ed 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -201,7 +201,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): class Test2DCompat(base.Dim2CompatTests): @pytest.fixture(autouse=True) - def arrow_not_supported(self, data, request): + def arrow_not_supported(self, data): if isinstance(data, ArrowStringArray): pytest.skip(reason="2D support not implemented for ArrowStringArray") diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 2ab1cd13a31d8..bce7d2d554004 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -25,12 +25,7 @@ def test_reindex(datetime_series, string_series): identity = string_series.reindex(string_series.index) - # __array_interface__ is not defined for older numpies - # and on some pythons - try: - assert np.may_share_memory(string_series.index, identity.index) - except AttributeError: - pass + assert np.may_share_memory(string_series.index, identity.index) assert identity.index.is_(string_series.index) assert identity.index.identical(string_series.index) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 80fd2fd7c0a06..44121cb5f784f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -777,7 +777,7 @@ class TestNamePreservation: @pytest.mark.parametrize("box", [list, tuple, np.array, Index, Series, pd.array]) @pytest.mark.parametrize("flex", [True, False]) def test_series_ops_name_retention(self, flex, box, names, all_binary_operators): - # GH#33930 consistent name renteiton + # GH#33930 consistent name-retention op = all_binary_operators left = Series(range(10), name=names[0])