Skip to content

DEPR: pd.value_counts #53493

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 12, 2023
4 changes: 1 addition & 3 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ matching index:
Value counts (histogramming) / mode
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The :meth:`~Series.value_counts` Series method and top-level function computes a histogram
The :meth:`~Series.value_counts` Series method computes a histogram
of a 1D array of values. It can also be used as a function on regular arrays:

.. ipython:: python
Expand All @@ -684,7 +684,6 @@ of a 1D array of values. It can also be used as a function on regular arrays:
data
s = pd.Series(data)
s.value_counts()
pd.value_counts(data)
The :meth:`~DataFrame.value_counts` method can be used to count combinations across multiple columns.
By default all columns are used but a subset can be selected using the ``subset`` argument.
Expand Down Expand Up @@ -733,7 +732,6 @@ normally distributed data into equal-size quartiles like so:
arr = np.random.randn(30)
factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1])
factor
pd.value_counts(factor)
We can also pass infinite values to define the bins:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ Deprecations
- Deprecated unused "closed" and "normalize" keywords in the :class:`DatetimeIndex` constructor (:issue:`52628`)
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`)
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
Expand Down
29 changes: 27 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,31 @@ def value_counts(
-------
Series
"""
warnings.warn(
# GH#53493
"pandas.value_counts is deprecated and will be removed in a "
"future version. Use pd.Series(obj).value_counts() instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return value_counts_internal(
values,
sort=sort,
ascending=ascending,
normalize=normalize,
bins=bins,
dropna=dropna,
)


def value_counts_internal(
values,
sort: bool = True,
ascending: bool = False,
normalize: bool = False,
bins=None,
dropna: bool = True,
) -> Series:
from pandas import (
Index,
Series,
Expand Down Expand Up @@ -1678,8 +1703,8 @@ def union_with_duplicates(
"""
from pandas import Series

l_count = value_counts(lvals, dropna=False)
r_count = value_counts(rvals, dropna=False)
l_count = value_counts_internal(lvals, dropna=False)
r_count = value_counts_internal(rvals, dropna=False)
l_count, r_count = l_count.align(r_count, fill_value=0)
final_count = np.maximum(l_count.values, r_count.values)
final_count = Series(final_count, index=l_count.index, dtype="int", copy=False)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from pandas.core.algorithms import (
take,
unique,
value_counts,
value_counts_internal as value_counts,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.array_algos.transforms import shift
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
isin,
take,
unique,
value_counts,
value_counts_internal as value_counts,
)
from pandas.core.arrays.base import (
ExtensionArray,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
return self._wrap_reduction_result(axis, result)

def value_counts(self, dropna: bool = True) -> Series:
from pandas import value_counts
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, dropna=dropna).astype("Int64")
result.index = result.index.astype(self.dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ def value_counts(
NaN 1
Name: count, dtype: int64
"""
return algorithms.value_counts(
return algorithms.value_counts_internal(
self,
sort=sort,
ascending=ascending,
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_scalar,
)
from pandas.core import arraylike
from pandas.core.algorithms import value_counts_internal as value_counts
from pandas.core.arraylike import OpsMixin
from pandas.core.arrays import (
ExtensionArray,
Expand Down Expand Up @@ -273,8 +274,6 @@ def convert_values(param):
return np.asarray(res, dtype=bool)

def value_counts(self, dropna: bool = True):
from pandas.core.algorithms import value_counts

return value_counts(self.to_numpy(), dropna=dropna)


Expand Down
41 changes: 29 additions & 12 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,21 +1181,26 @@ def test_value_counts(self):
factor = cut(arr, 4)

# assert isinstance(factor, n)
result = algos.value_counts(factor)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(factor)
breaks = [-1.194, -0.535, 0.121, 0.777, 1.433]
index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True))
expected = Series([1, 1, 1, 1], index=index, name="count")
tm.assert_series_equal(result.sort_index(), expected.sort_index())

def test_value_counts_bins(self):
s = [1, 2, 3, 4]
result = algos.value_counts(s, bins=1)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(s, bins=1)
expected = Series(
[4], index=IntervalIndex.from_tuples([(0.996, 4.0)]), name="count"
)
tm.assert_series_equal(result, expected)

result = algos.value_counts(s, bins=2, sort=False)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(s, bins=2, sort=False)
expected = Series(
[2, 2],
index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]),
Expand All @@ -1204,31 +1209,40 @@ def test_value_counts_bins(self):
tm.assert_series_equal(result, expected)

def test_value_counts_dtypes(self):
result = algos.value_counts(np.array([1, 1.0]))
msg2 = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(np.array([1, 1.0]))
assert len(result) == 1

result = algos.value_counts(np.array([1, 1.0]), bins=1)
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(np.array([1, 1.0]), bins=1)
assert len(result) == 1

result = algos.value_counts(Series([1, 1.0, "1"])) # object
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = algos.value_counts(Series([1, 1.0, "1"])) # object
assert len(result) == 2

msg = "bins argument only works with numeric data"
with pytest.raises(TypeError, match=msg):
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)
with tm.assert_produces_warning(FutureWarning, match=msg2):
algos.value_counts(np.array(["1", 1], dtype=object), bins=1)

def test_value_counts_nat(self):
td = Series([np.timedelta64(10000), NaT], dtype="timedelta64[ns]")
dt = to_datetime(["NaT", "2014-01-01"])

msg = "pandas.value_counts is deprecated"

for s in [td, dt]:
vc = algos.value_counts(s)
vc_with_na = algos.value_counts(s, dropna=False)
with tm.assert_produces_warning(FutureWarning, match=msg):
vc = algos.value_counts(s)
vc_with_na = algos.value_counts(s, dropna=False)
assert len(vc) == 1
assert len(vc_with_na) == 2

exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}, name="count")
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
with tm.assert_produces_warning(FutureWarning, match=msg):
tm.assert_series_equal(algos.value_counts(dt), exp_dt)
# TODO same for (timedelta)

def test_value_counts_datetime_outofbounds(self):
Expand Down Expand Up @@ -1388,13 +1402,16 @@ def test_value_counts_normalized(self, dtype):
def test_value_counts_uint64(self):
arr = np.array([2**63], dtype=np.uint64)
expected = Series([1], index=[2**63], name="count")
result = algos.value_counts(arr)
msg = "pandas.value_counts is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(arr)

tm.assert_series_equal(result, expected)

arr = np.array([-1, 2**63], dtype=object)
expected = Series([1, 1], index=[-1, 2**63], name="count")
result = algos.value_counts(arr)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = algos.value_counts(arr)

tm.assert_series_equal(result, expected)

Expand Down