Skip to content

CLN: assorted #54673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ def take_nd(
"""
Specialized Cython take which sets NaN values in one pass

This dispatches to ``take`` defined on ExtensionArrays. It does not
currently dispatch to ``SparseArray.take`` for sparse ``arr``.
This dispatches to ``take`` defined on ExtensionArrays.

Note: this function assumes that the indexer is a valid(ated) indexer with
no out of bound indices.
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2042,6 +2042,7 @@ def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:
result[~mask] = val
return result

# TODO(3.0): this can be removed once GH#33302 deprecation is enforced
def _fill_mask_inplace(
self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
) -> None:
Expand Down
10 changes: 4 additions & 6 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2898,17 +2898,15 @@ def _delegate_method(self, name: str, *args, **kwargs):
# utility routines


def _get_codes_for_values(values, categories: Index) -> np.ndarray:
def _get_codes_for_values(
values: Index | Series | ExtensionArray | np.ndarray,
categories: Index,
) -> np.ndarray:
"""
utility routine to turn values into codes given the specified categories

If `values` is known to be a Categorical, use recode_for_categories instead.
"""
if values.ndim > 1:
flat = values.ravel()
codes = _get_codes_for_values(flat, categories)
return codes.reshape(values.shape)

codes = categories.get_indexer_for(values)
return coerce_indexer_dtype(codes, categories)

Expand Down
1 change: 1 addition & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ def is_terminal() -> bool:


def use_inf_as_na_cb(key) -> None:
# TODO(3.0): enforcing this deprecation will close GH#52501
from pandas.core.dtypes.missing import _use_inf_as_na

_use_inf_as_na(key)
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,8 +1707,6 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
arr._validate_setitem_value(element)
return True
except (ValueError, TypeError):
# TODO: re-use _catch_deprecated_value_error to ensure we are
# strict about what exceptions we allow through here.
return False

# This is technically incorrect, but maintains the behavior of
Expand Down
1 change: 1 addition & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ def __new__(cls, freq):

if isinstance(freq, BDay):
# GH#53446
# TODO(3.0): enforcing this will close GH#10575
warnings.warn(
"PeriodDtype[B] is deprecated and will be removed in a future "
"version. Use a DatetimeIndex with freq='B' instead",
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin):
"_is_copy",
"_name",
"_metadata",
"__array_struct__",
"__array_interface__",
"_flags",
]
_internal_names_set: set[str] = set(_internal_names)
Expand Down Expand Up @@ -6970,6 +6968,9 @@ def _pad_or_backfill(
method = clean_fill_method(method)

if not self._mgr.is_single_block and axis == 1:
# e.g. test_align_fill_method
# TODO(3.0): once downcast is removed, we can do the .T
# in all axis=1 cases, and remove axis kward from mgr.pad_or_backfill.
if inplace:
raise NotImplementedError()
result = self.T._pad_or_backfill(method=method, limit=limit).T
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ def indexer(self):
@final
@property
def obj(self):
# TODO(3.0): enforcing these deprecations on Grouper should close
# GH#25564, GH#41930
warnings.warn(
f"{type(self).__name__}.obj is deprecated and will be removed "
"in a future version. Use GroupBy.indexer instead.",
Expand Down
40 changes: 18 additions & 22 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3782,17 +3782,23 @@ def get_loc(self, key):
self._check_indexing_error(key)
raise

_index_shared_docs[
"get_indexer"
] = """
@final
def get_indexer(
self,
target,
method: ReindexMethod | None = None,
limit: int | None = None,
tolerance=None,
) -> npt.NDArray[np.intp]:
"""
Compute indexer and mask for new index given the current index.

The indexer should be then used as an input to ndarray.take to align the
current data to the new index.

Parameters
----------
target : %(target_klass)s
target : Index
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
* default: exact matches only.
* pad / ffill: find the PREVIOUS index value if no exact match.
Expand All @@ -3819,7 +3825,7 @@ def get_loc(self, key):
Integers from 0 to n - 1 indicating that the index at these
positions matches the corresponding target values. Missing values
in the target are marked by -1.
%(raises_section)s

Notes
-----
Returns -1 for unmatched values, for further explanation see the
Expand All @@ -3834,16 +3840,6 @@ def get_loc(self, key):
Notice that the return value is an array of locations in ``index``
and ``x`` is marked by -1, as it is not in ``index``.
"""

@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
@final
def get_indexer(
self,
target,
method: ReindexMethod | None = None,
limit: int | None = None,
tolerance=None,
) -> npt.NDArray[np.intp]:
method = clean_reindex_fill_method(method)
orig_target = target
target = self._maybe_cast_listlike_indexer(target)
Expand Down Expand Up @@ -3898,7 +3894,7 @@ def get_indexer(

return ensure_platform_int(indexer)

pself, ptarget = self._maybe_promote(target)
pself, ptarget = self._maybe_downcast_for_indexing(target)
if pself is not self or ptarget is not target:
return pself.get_indexer(
ptarget, method=method, limit=limit, tolerance=tolerance
Expand Down Expand Up @@ -4582,7 +4578,7 @@ def join(

if not self._is_multi and not other._is_multi:
# We have specific handling for MultiIndex below
pself, pother = self._maybe_promote(other)
pself, pother = self._maybe_downcast_for_indexing(other)
if pself is not self or pother is not other:
return pself.join(
pother, how=how, level=level, return_indexers=True, sort=sort
Expand Down Expand Up @@ -6046,7 +6042,7 @@ def get_indexer_non_unique(
# that can be matched to Interval scalars.
return self._get_indexer_non_comparable(target, method=None, unique=False)

pself, ptarget = self._maybe_promote(target)
pself, ptarget = self._maybe_downcast_for_indexing(target)
if pself is not self or ptarget is not target:
return pself.get_indexer_non_unique(ptarget)

Expand All @@ -6062,8 +6058,8 @@ def get_indexer_non_unique(
# TODO: get_indexer has fastpaths for both Categorical-self and
# Categorical-target. Can we do something similar here?

# Note: _maybe_promote ensures we never get here with MultiIndex
# self and non-Multi target
# Note: _maybe_downcast_for_indexing ensures we never get here
# with MultiIndex self and non-Multi target
tgt_values = target._get_engine_target()
if self._is_multi and target._is_multi:
engine = self._engine
Expand Down Expand Up @@ -6237,7 +6233,7 @@ def _index_as_unique(self) -> bool:
_requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"

@final
def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
"""
When dealing with an object-dtype Index and a non-object Index, see
if we can upcast the object-dtype one to improve performance.
Expand Down Expand Up @@ -6278,7 +6274,7 @@ def _maybe_promote(self, other: Index) -> tuple[Index, Index]:

if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
# Reverse op so we dont need to re-implement on the subclasses
other, self = other._maybe_promote(self)
other, self = other._maybe_downcast_for_indexing(self)

return self, other

Expand Down
26 changes: 3 additions & 23 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
BlockValuesRefs,
)
from pandas._libs.missing import NA
from pandas._libs.tslibs import IncompatibleFrequency
from pandas._typing import (
ArrayLike,
AxisInt,
Expand Down Expand Up @@ -1731,9 +1730,7 @@ def setitem(self, indexer, value, using_cow: bool = False):

try:
values[indexer] = value
except (ValueError, TypeError) as err:
_catch_deprecated_value_error(err)

except (ValueError, TypeError):
if isinstance(self.dtype, IntervalDtype):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
Expand Down Expand Up @@ -1776,9 +1773,7 @@ def where(

try:
res_values = arr._where(cond, other).T
except (ValueError, TypeError) as err:
_catch_deprecated_value_error(err)

except (ValueError, TypeError):
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# TestSetitemFloatIntervalWithIntIntervalValues
Expand Down Expand Up @@ -1847,9 +1842,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:
try:
# Caller is responsible for ensuring matching lengths
values._putmask(mask, new)
except (TypeError, ValueError) as err:
_catch_deprecated_value_error(err)

except (TypeError, ValueError):
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# Discussion about what we want to support in the general
Expand Down Expand Up @@ -2256,19 +2249,6 @@ def is_view(self) -> bool:
return self.values._ndarray.base is not None


def _catch_deprecated_value_error(err: Exception) -> None:
"""
We catch ValueError for now, but only a specific one raised by DatetimeArray
which will no longer be raised in version 2.0.
"""
if isinstance(err, ValueError):
if isinstance(err, IncompatibleFrequency):
pass
elif "'value.closed' is" in str(err):
# IntervalDtype mismatched 'closed'
pass


class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
"""Block for datetime64[ns], timedelta64[ns]."""

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3985,6 +3985,8 @@ def argsort(
mask = isna(values)

if mask.any():
# TODO(3.0): once this deprecation is enforced we can call
# self.array.argsort directly, which will close GH#43840
warnings.warn(
"The behavior of Series.argsort in the presence of NA values is "
"deprecated. In a future version, NA values will be ordered "
Expand Down Expand Up @@ -5199,6 +5201,7 @@ def info(
show_counts=show_counts,
)

# TODO(3.0): this can be removed once GH#33302 deprecation is enforced
def _replace_single(self, to_replace, method: str, inplace: bool, limit):
"""
Replaces values in a Series using the fill method specified when no
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ def test_astype_roundtrip(dtype):
result = casted.astype("datetime64[ns]")
tm.assert_series_equal(result, ser)

# GH#38509 same thing for timedelta64
ser2 = ser - ser.iloc[-1]
casted2 = ser2.astype(dtype)
assert is_dtype_equal(casted2.dtype, dtype)

result2 = casted2.astype(ser2.dtype)
tm.assert_series_equal(result2, ser2)


def test_add(dtype):
a = pd.Series(["a", "b", "c", None, None], dtype=dtype)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
ser = pd.Series(data)

if not self._supports_reduction(ser, op_name):
# TODO: the message being checked here isn't actually checking anything
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
Expand All @@ -101,6 +102,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
ser = pd.Series(data)

if not self._supports_reduction(ser, op_name):
# TODO: the message being checked here isn't actually checking anything
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/extension/date/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,13 @@ def isna(self) -> np.ndarray:
@classmethod
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
if isinstance(scalars, dt.date):
pass
raise TypeError
elif isinstance(scalars, DateArray):
pass
if dtype is not None:
return scalars.astype(dtype, copy=copy)
if copy:
return scalars.copy()
return scalars[:]
elif isinstance(scalars, np.ndarray):
scalars = scalars.astype("U10") # 10 chars for yyyy-mm-dd
return DateArray(scalars)
8 changes: 6 additions & 2 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,10 @@ def test_fillna_no_op_returns_copy(self, data, request):
super().test_fillna_no_op_returns_copy(data)

@pytest.mark.xfail(reason="Unsupported")
def test_fillna_series(self):
def test_fillna_series(self, data_missing):
# this one looks doable.
# TODO: this fails bc we do not pass through data_missing. If we did,
# the 0-fill case would xpass
super().test_fillna_series()

def test_fillna_frame(self, data_missing):
Expand Down Expand Up @@ -349,7 +351,9 @@ def test_map_raises(self, data, na_action):

class TestCasting(BaseSparseTests, base.BaseCastingTests):
@pytest.mark.xfail(raises=TypeError, reason="no sparse StringDtype")
def test_astype_string(self, data):
def test_astype_string(self, data, nullable_string_dtype):
# TODO: this fails bc we do not pass through nullable_string_dtype;
# If we did, the 0-cases would xpass
super().test_astype_string(data)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):

class Test2DCompat(base.Dim2CompatTests):
@pytest.fixture(autouse=True)
def arrow_not_supported(self, data, request):
def arrow_not_supported(self, data):
if isinstance(data, ArrowStringArray):
pytest.skip(reason="2D support not implemented for ArrowStringArray")

Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/series/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,7 @@
def test_reindex(datetime_series, string_series):
identity = string_series.reindex(string_series.index)

# __array_interface__ is not defined for older numpies
# and on some pythons
try:
assert np.may_share_memory(string_series.index, identity.index)
except AttributeError:
pass
assert np.may_share_memory(string_series.index, identity.index)

assert identity.index.is_(string_series.index)
assert identity.index.identical(string_series.index)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,7 @@ class TestNamePreservation:
@pytest.mark.parametrize("box", [list, tuple, np.array, Index, Series, pd.array])
@pytest.mark.parametrize("flex", [True, False])
def test_series_ops_name_retention(self, flex, box, names, all_binary_operators):
# GH#33930 consistent name renteiton
# GH#33930 consistent name-retention
op = all_binary_operators

left = Series(range(10), name=names[0])
Expand Down