Skip to content

Commit 9ac1562

Browse files
authored
DEPR: dtype inference in value_counts (#56161)
* DEPR: dtype inference in value_counts * GH ref
1 parent 3530b3d commit 9ac1562

File tree

4 files changed

+45
-3
lines changed

4 files changed

+45
-3
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ Other Deprecations
396396
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
397397
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
398398
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
399+
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
399400
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
400401
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
401402
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)

pandas/core/algorithms.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,16 @@ def value_counts_internal(
932932
idx = Index(keys)
933933
if idx.dtype == bool and keys.dtype == object:
934934
idx = idx.astype(object)
935+
elif idx.dtype != keys.dtype:
936+
warnings.warn(
937+
# GH#56161
938+
"The behavior of value_counts with object-dtype is deprecated. "
939+
"In a future version, this will *not* perform dtype inference "
940+
"on the resulting index. To retain the old behavior, use "
941+
"`result.index = result.index.infer_objects()`",
942+
FutureWarning,
943+
stacklevel=find_stack_level(),
944+
)
935945
idx.name = index_name
936946

937947
result = Series(counts, index=idx, name=name, copy=False)
@@ -1712,8 +1722,16 @@ def union_with_duplicates(
17121722
"""
17131723
from pandas import Series
17141724

1715-
l_count = value_counts_internal(lvals, dropna=False)
1716-
r_count = value_counts_internal(rvals, dropna=False)
1725+
with warnings.catch_warnings():
1726+
# filter warning from object dtype inference; we will end up discarding
1727+
# the index here, so the deprecation does not affect the end result here.
1728+
warnings.filterwarnings(
1729+
"ignore",
1730+
"The behavior of value_counts with object-dtype is deprecated",
1731+
category=FutureWarning,
1732+
)
1733+
l_count = value_counts_internal(lvals, dropna=False)
1734+
r_count = value_counts_internal(rvals, dropna=False)
17171735
l_count, r_count = l_count.align(r_count, fill_value=0)
17181736
final_count = np.maximum(l_count.values, r_count.values)
17191737
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)

pandas/core/arrays/interval.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
Union,
1313
overload,
1414
)
15+
import warnings
1516

1617
import numpy as np
1718

@@ -1226,7 +1227,16 @@ def value_counts(self, dropna: bool = True) -> Series:
12261227
Series.value_counts
12271228
"""
12281229
# TODO: implement this is a non-naive way!
1229-
return value_counts(np.asarray(self), dropna=dropna)
1230+
with warnings.catch_warnings():
1231+
warnings.filterwarnings(
1232+
"ignore",
1233+
"The behavior of value_counts with object-dtype is deprecated",
1234+
category=FutureWarning,
1235+
)
1236+
result = value_counts(np.asarray(self), dropna=dropna)
1237+
# Once the deprecation is enforced, we will need to do
1238+
# `result.index = result.index.astype(self.dtype)`
1239+
return result
12301240

12311241
# ---------------------------------------------------------------------
12321242
# Rendering Methods

pandas/tests/base/test_value_counts.py

+13
Original file line numberDiff line numberDiff line change
@@ -336,3 +336,16 @@ def test_value_counts_with_nan(dropna, index_or_series):
336336
else:
337337
expected = Series([1, 1, 1], index=[True, pd.NA, np.nan], name="count")
338338
tm.assert_series_equal(res, expected)
339+
340+
341+
def test_value_counts_object_inference_deprecated():
342+
# GH#56161
343+
dti = pd.date_range("2016-01-01", periods=3, tz="UTC")
344+
345+
idx = dti.astype(object)
346+
msg = "The behavior of value_counts with object-dtype is deprecated"
347+
with tm.assert_produces_warning(FutureWarning, match=msg):
348+
res = idx.value_counts()
349+
350+
exp = dti.value_counts()
351+
tm.assert_series_equal(res, exp)

0 commit comments

Comments
 (0)