Skip to content

Commit 8b6a244

Browse files
authored
DEPR: pd.value_counts (#53493)
* DEPR: pd.value_counts * update docs * suppress warning * suppress warning * suppress warning * suppress warning * pyright ignore * pylint ignore * warn inside value_counts
1 parent d7af4db commit 8b6a244

File tree

9 files changed

+63
-23
lines changed

9 files changed

+63
-23
lines changed

doc/source/user_guide/basics.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ matching index:
675675
Value counts (histogramming) / mode
676676
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
677677

678-
The :meth:`~Series.value_counts` Series method and top-level function computes a histogram
678+
The :meth:`~Series.value_counts` Series method computes a histogram
679679
of a 1D array of values. It can also be used as a function on regular arrays:
680680

681681
.. ipython:: python
@@ -684,7 +684,6 @@ of a 1D array of values. It can also be used as a function on regular arrays:
684684
data
685685
s = pd.Series(data)
686686
s.value_counts()
687-
pd.value_counts(data)
688687
689688
The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
690689
By default all columns are used but a subset can be selected using the ``subset`` argument.
@@ -733,7 +732,6 @@ normally distributed data into equal-size quartiles like so:
733732
arr = np.random.randn(30)
734733
factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1])
735734
factor
736-
pd.value_counts(factor)
737735
738736
We can also pass infinite values to define the bins:
739737

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ Deprecations
276276
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
277277
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
278278
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
279+
- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`)
279280
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
280281
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
281282
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)

pandas/core/algorithms.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,31 @@ def value_counts(
838838
-------
839839
Series
840840
"""
841+
warnings.warn(
842+
# GH#53493
843+
"pandas.value_counts is deprecated and will be removed in a "
844+
"future version. Use pd.Series(obj).value_counts() instead.",
845+
FutureWarning,
846+
stacklevel=find_stack_level(),
847+
)
848+
return value_counts_internal(
849+
values,
850+
sort=sort,
851+
ascending=ascending,
852+
normalize=normalize,
853+
bins=bins,
854+
dropna=dropna,
855+
)
856+
857+
858+
def value_counts_internal(
859+
values,
860+
sort: bool = True,
861+
ascending: bool = False,
862+
normalize: bool = False,
863+
bins=None,
864+
dropna: bool = True,
865+
) -> Series:
841866
from pandas import (
842867
Index,
843868
Series,
@@ -1678,8 +1703,8 @@ def union_with_duplicates(
16781703
"""
16791704
from pandas import Series
16801705

1681-
l_count = value_counts(lvals, dropna=False)
1682-
r_count = value_counts(rvals, dropna=False)
1706+
l_count = value_counts_internal(lvals, dropna=False)
1707+
r_count = value_counts_internal(rvals, dropna=False)
16831708
l_count, r_count = l_count.align(r_count, fill_value=0)
16841709
final_count = np.maximum(l_count.values, r_count.values)
16851710
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)

pandas/core/arrays/_mixins.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
from pandas.core.algorithms import (
4949
take,
5050
unique,
51-
value_counts,
51+
value_counts_internal as value_counts,
5252
)
5353
from pandas.core.array_algos.quantile import quantile_with_mask
5454
from pandas.core.array_algos.transforms import shift

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
isin,
8080
take,
8181
unique,
82-
value_counts,
82+
value_counts_internal as value_counts,
8383
)
8484
from pandas.core.arrays.base import (
8585
ExtensionArray,

pandas/core/arrays/string_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
496496
return self._wrap_reduction_result(axis, result)
497497

498498
def value_counts(self, dropna: bool = True) -> Series:
499-
from pandas import value_counts
499+
from pandas.core.algorithms import value_counts_internal as value_counts
500500

501501
result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
502502
result.index = result.index.astype(self.dtype)

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -958,7 +958,7 @@ def value_counts(
958958
NaN 1
959959
Name: count, dtype: int64
960960
"""
961-
return algorithms.value_counts(
961+
return algorithms.value_counts_internal(
962962
self,
963963
sort=sort,
964964
ascending=ascending,

pandas/tests/extension/decimal/array.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
is_scalar,
2626
)
2727
from pandas.core import arraylike
28+
from pandas.core.algorithms import value_counts_internal as value_counts
2829
from pandas.core.arraylike import OpsMixin
2930
from pandas.core.arrays import (
3031
ExtensionArray,
@@ -273,8 +274,6 @@ def convert_values(param):
273274
return np.asarray(res, dtype=bool)
274275

275276
def value_counts(self, dropna: bool = True):
276-
from pandas.core.algorithms import value_counts
277-
278277
return value_counts(self.to_numpy(), dropna=dropna)
279278

280279

pandas/tests/test_algos.py

+29-12
Original file line numberDiff line numberDiff line change
@@ -1181,21 +1181,26 @@ def test_value_counts(self):
11811181
factor = cut(arr, 4)
11821182

11831183
# assert isinstance(factor, n)
1184-
result = algos.value_counts(factor)
1184+
msg = "pandas.value_counts is deprecated"
1185+
with tm.assert_produces_warning(FutureWarning, match=msg):
1186+
result = algos.value_counts(factor)
11851187
breaks = [-1.194, -0.535, 0.121, 0.777, 1.433]
11861188
index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
11871189
expected = Series([1, 1, 1, 1], index=index, name="count")
11881190
tm.assert_series_equal(result.sort_index(), expected.sort_index())
11891191

11901192
def test_value_counts_bins(self):
11911193
s = [1, 2, 3, 4]
1192-
result = algos.value_counts(s, bins=1)
1194+
msg = "pandas.value_counts is deprecated"
1195+
with tm.assert_produces_warning(FutureWarning, match=msg):
1196+
result = algos.value_counts(s, bins=1)
11931197
expected = Series(
11941198
[4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
11951199
)
11961200
tm.assert_series_equal(result, expected)
11971201

1198-
result = algos.value_counts(s, bins=2, sort=False)
1202+
with tm.assert_produces_warning(FutureWarning, match=msg):
1203+
result = algos.value_counts(s, bins=2, sort=False)
11991204
expected = Series(
12001205
[2, 2],
12011206
index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
@@ -1204,31 +1209,40 @@ def test_value_counts_bins(self):
12041209
tm.assert_series_equal(result, expected)
12051210

12061211
def test_value_counts_dtypes(self):
1207-
result = algos.value_counts(np.array([1, 1.0]))
1212+
msg2 = "pandas.value_counts is deprecated"
1213+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1214+
result = algos.value_counts(np.array([1, 1.0]))
12081215
assert len(result) == 1
12091216

1210-
result = algos.value_counts(np.array([1, 1.0]), bins=1)
1217+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1218+
result = algos.value_counts(np.array([1, 1.0]), bins=1)
12111219
assert len(result) == 1
12121220

1213-
result = algos.value_counts(Series([1, 1.0, "1"])) # object
1221+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1222+
result = algos.value_counts(Series([1, 1.0, "1"])) # object
12141223
assert len(result) == 2
12151224

12161225
msg = "bins argument only works with numeric data"
12171226
with pytest.raises(TypeError, match=msg):
1218-
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
1227+
with tm.assert_produces_warning(FutureWarning, match=msg2):
1228+
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
12191229

12201230
def test_value_counts_nat(self):
12211231
td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
12221232
dt = to_datetime(["NaT", "2014-01-01"])
12231233

1234+
msg = "pandas.value_counts is deprecated"
1235+
12241236
for s in [td, dt]:
1225-
vc = algos.value_counts(s)
1226-
vc_with_na = algos.value_counts(s, dropna=False)
1237+
with tm.assert_produces_warning(FutureWarning, match=msg):
1238+
vc = algos.value_counts(s)
1239+
vc_with_na = algos.value_counts(s, dropna=False)
12271240
assert len(vc) == 1
12281241
assert len(vc_with_na) == 2
12291242

12301243
exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
1231-
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
1244+
with tm.assert_produces_warning(FutureWarning, match=msg):
1245+
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
12321246
# TODO same for (timedelta)
12331247

12341248
def test_value_counts_datetime_outofbounds(self):
@@ -1388,13 +1402,16 @@ def test_value_counts_normalized(self, dtype):
13881402
def test_value_counts_uint64(self):
13891403
arr = np.array([2**63], dtype=np.uint64)
13901404
expected = Series([1], index=[2**63], name="count")
1391-
result = algos.value_counts(arr)
1405+
msg = "pandas.value_counts is deprecated"
1406+
with tm.assert_produces_warning(FutureWarning, match=msg):
1407+
result = algos.value_counts(arr)
13921408

13931409
tm.assert_series_equal(result, expected)
13941410

13951411
arr = np.array([-1, 2**63], dtype=object)
13961412
expected = Series([1, 1], index=[-1, 2**63], name="count")
1397-
result = algos.value_counts(arr)
1413+
with tm.assert_produces_warning(FutureWarning, match=msg):
1414+
result = algos.value_counts(arr)
13981415

13991416
tm.assert_series_equal(result, expected)
14001417

0 commit comments

Comments
 (0)