Skip to content

Commit c032845

Browse files
authored
DEPR: value_counts doing dtype inference on result.index (#58009)
1 parent fc4af6a commit c032845

File tree

4 files changed

+10
-43
lines changed

4 files changed

+10
-43
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ Removal of prior version deprecations/changes
203203
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
204204
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
205205
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
206+
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)
206207
- Enforced deprecation of :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` allowing the ``name`` argument to be a non-tuple when grouping by a list of length 1 (:issue:`54155`)
207208
- Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`)
208209
- Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`)

pandas/core/algorithms.py

+5-30
Original file line numberDiff line numberDiff line change
@@ -892,26 +892,9 @@ def value_counts_internal(
892892
if keys.dtype == np.float16:
893893
keys = keys.astype(np.float32)
894894

895-
# For backwards compatibility, we let Index do its normal type
896-
# inference, _except_ for if if infers from object to bool.
897-
idx = Index(keys)
898-
if idx.dtype == bool and keys.dtype == object:
899-
idx = idx.astype(object)
900-
elif (
901-
idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714
902-
and idx.dtype != "string[pyarrow_numpy]"
903-
):
904-
warnings.warn(
905-
# GH#56161
906-
"The behavior of value_counts with object-dtype is deprecated. "
907-
"In a future version, this will *not* perform dtype inference "
908-
"on the resulting index. To retain the old behavior, use "
909-
"`result.index = result.index.infer_objects()`",
910-
FutureWarning,
911-
stacklevel=find_stack_level(),
912-
)
913-
idx.name = index_name
914-
895+
# Starting in 3.0, we no longer perform dtype inference on the
896+
# Index object we construct here, xref GH#56161
897+
idx = Index(keys, dtype=keys.dtype, name=index_name)
915898
result = Series(counts, index=idx, name=name, copy=False)
916899

917900
if sort:
@@ -1606,16 +1589,8 @@ def union_with_duplicates(
16061589
"""
16071590
from pandas import Series
16081591

1609-
with warnings.catch_warnings():
1610-
# filter warning from object dtype inference; we will end up discarding
1611-
# the index here, so the deprecation does not affect the end result here.
1612-
warnings.filterwarnings(
1613-
"ignore",
1614-
"The behavior of value_counts with object-dtype is deprecated",
1615-
category=FutureWarning,
1616-
)
1617-
l_count = value_counts_internal(lvals, dropna=False)
1618-
r_count = value_counts_internal(rvals, dropna=False)
1592+
l_count = value_counts_internal(lvals, dropna=False)
1593+
r_count = value_counts_internal(rvals, dropna=False)
16191594
l_count, r_count = l_count.align(r_count, fill_value=0)
16201595
final_count = np.maximum(l_count.values, r_count.values)
16211596
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)

pandas/core/arrays/interval.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
Union,
1414
overload,
1515
)
16-
import warnings
1716

1817
import numpy as np
1918

@@ -1217,15 +1216,8 @@ def value_counts(self, dropna: bool = True) -> Series:
12171216
Series.value_counts
12181217
"""
12191218
# TODO: implement this is a non-naive way!
1220-
with warnings.catch_warnings():
1221-
warnings.filterwarnings(
1222-
"ignore",
1223-
"The behavior of value_counts with object-dtype is deprecated",
1224-
category=FutureWarning,
1225-
)
1226-
result = value_counts(np.asarray(self), dropna=dropna)
1227-
# Once the deprecation is enforced, we will need to do
1228-
# `result.index = result.index.astype(self.dtype)`
1219+
result = value_counts(np.asarray(self), dropna=dropna)
1220+
result.index = result.index.astype(self.dtype)
12291221
return result
12301222

12311223
# ---------------------------------------------------------------------

pandas/tests/base/test_value_counts.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,8 @@ def test_value_counts_object_inference_deprecated():
347347
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
348348

349349
idx = dti.astype(object)
350-
msg = "The behavior of value_counts with object-dtype is deprecated"
351-
with tm.assert_produces_warning(FutureWarning, match=msg):
352-
res = idx.value_counts()
350+
res = idx.value_counts()
353351

354352
exp = dti.value_counts()
353+
exp.index = exp.index.astype(object)
355354
tm.assert_series_equal(res, exp)

0 commit comments

Comments
 (0)