Skip to content

Commit a2d9096

Browse files
author
MarcoGorelli
committed
wip
1 parent e41b6d7 commit a2d9096

File tree

10 files changed

+356
-135
lines changed

10 files changed

+356
-135
lines changed

doc/source/whatsnew/v1.5.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Bug fixes
3333

3434
Other
3535
~~~~~
36-
-
36+
- Introduced ``FutureWarning`` notifying about behaviour change in :meth:`DataFrame.value_counts`, :meth:`Series.value_counts`, :meth:`DataFrameGroupBy.value_counts`, :meth:`SeriesGroupBy.value_counts` - the resulting series will by default now be named ``'counts'`` (or ``'proportion'`` if ``normalize=True``), and the index (if present) will be taken from the original object's name (:issue:`49497`)
3737
-
3838

3939
.. ---------------------------------------------------------------------------

pandas/core/algorithms.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,7 @@ def value_counts(
810810
normalize: bool = False,
811811
bins=None,
812812
dropna: bool = True,
813+
name: Hashable | None = None,
813814
) -> Series:
814815
"""
815816
Compute a histogram of the counts of non-null values.
@@ -838,7 +839,8 @@ def value_counts(
838839
Series,
839840
)
840841

841-
name = getattr(values, "name", None)
842+
if name is None:
843+
name = getattr(values, "name", None)
842844

843845
if bins is not None:
844846
from pandas.core.reshape.tile import cut
@@ -850,7 +852,7 @@ def value_counts(
850852
raise TypeError("bins argument only works with numeric data.") from err
851853

852854
# count, remove nulls (from the index), and but the bins
853-
result = ii.value_counts(dropna=dropna)
855+
result = ii.value_counts(dropna=dropna, name=name)
854856
result = result[result.index.notna()]
855857
result.index = result.index.astype("interval")
856858
result = result.sort_index()

pandas/core/base.py

+16
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
final,
1818
overload,
1919
)
20+
import warnings
2021

2122
import numpy as np
2223

@@ -37,6 +38,7 @@
3738
cache_readonly,
3839
doc,
3940
)
41+
from pandas.util._exceptions import find_stack_level
4042

4143
from pandas.core.dtypes.common import (
4244
is_categorical_dtype,
@@ -912,6 +914,8 @@ def value_counts(
912914
ascending: bool = False,
913915
bins=None,
914916
dropna: bool = True,
917+
*,
918+
name: lib.NoDefault = lib.no_default,
915919
) -> Series:
916920
"""
917921
Return a Series containing counts of unique values.
@@ -991,13 +995,25 @@ def value_counts(
991995
NaN 1
992996
dtype: int64
993997
"""
998+
if name is lib.no_default:
999+
result_name = "proportion" if normalize else "count"
1000+
warnings.warn(
1001+
"In pandas 2.0.0, the name of the resulting Series will be "
1002+
"'count' (or 'proportion' if `normalize=True`), and the index "
1003+
"will inherit the original object's name. Specify "
1004+
f"`name='{result_name}'` to silence this warning.",
1005+
FutureWarning,
1006+
stacklevel=find_stack_level(),
1007+
)
1008+
name = None
9941009
return value_counts(
9951010
self,
9961011
sort=sort,
9971012
ascending=ascending,
9981013
normalize=normalize,
9991014
bins=bins,
10001015
dropna=dropna,
1016+
name=name,
10011017
)
10021018

10031019
def unique(self):

pandas/core/frame.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -6936,6 +6936,8 @@ def value_counts(
69366936
sort: bool = True,
69376937
ascending: bool = False,
69386938
dropna: bool = True,
6939+
*,
6940+
name: NoDefault = no_default,
69396941
) -> Series:
69406942
"""
69416943
Return a Series containing counts of unique rows in the DataFrame.
@@ -7037,10 +7039,20 @@ def value_counts(
70377039
NaN 1
70387040
dtype: int64
70397041
"""
7042+
if name is no_default:
7043+
result_name = "proportion" if normalize else "count"
7044+
warnings.warn(
7045+
"In pandas 2.0.0, the name of the resulting Series will be "
7046+
"'count' (or 'proportion' if `normalize=True`). Specify "
7047+
f"`name='{result_name}'` to silence this warning.",
7048+
FutureWarning,
7049+
stacklevel=find_stack_level(),
7050+
)
7051+
name = None
70407052
if subset is None:
70417053
subset = self.columns.tolist()
70427054

7043-
counts = self.groupby(subset, dropna=dropna).grouper.size()
7055+
counts = self.groupby(subset, dropna=dropna).grouper.size().rename(name)
70447056

70457057
if sort:
70467058
counts = counts.sort_values(ascending=ascending)

pandas/core/groupby/generic.py

+39-7
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,19 @@ def value_counts(
604604
ascending: bool = False,
605605
bins=None,
606606
dropna: bool = True,
607+
*,
608+
name: lib.NoDefault = lib.no_default,
607609
) -> Series:
610+
if name is lib.no_default:
611+
result_name = "proportion" if normalize else "count"
612+
warnings.warn(
613+
"In pandas 2.0.0, the name of the resulting Series will be "
614+
"'count' (or 'proportion' if `normalize=True`). Specify "
615+
f"`name='{result_name}'` to silence this warning.",
616+
FutureWarning,
617+
stacklevel=find_stack_level(),
618+
)
619+
name = self.obj.name
608620

609621
from pandas.core.reshape.merge import get_join_indexers
610622
from pandas.core.reshape.tile import cut
@@ -626,6 +638,7 @@ def value_counts(
626638
sort=sort,
627639
ascending=ascending,
628640
bins=bins,
641+
name=name,
629642
)
630643
ser.index.names = names
631644
return ser
@@ -741,7 +754,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
741754

742755
if is_integer_dtype(out.dtype):
743756
out = ensure_int64(out)
744-
return self.obj._constructor(out, index=mi, name=self.obj.name)
757+
return self.obj._constructor(out, index=mi, name=name)
745758

746759
def fillna(
747760
self,
@@ -1875,6 +1888,8 @@ def value_counts(
18751888
sort: bool = True,
18761889
ascending: bool = False,
18771890
dropna: bool = True,
1891+
*,
1892+
name: lib.NoDefault = lib.no_default,
18781893
) -> DataFrame | Series:
18791894
"""
18801895
Return a Series or DataFrame containing counts of unique rows.
@@ -1979,6 +1994,19 @@ def value_counts(
19791994
3 male low US 0.25
19801995
4 male medium FR 0.25
19811996
"""
1997+
if name is lib.no_default and self.as_index:
1998+
result_name = "proportion" if normalize else "count"
1999+
warnings.warn(
2000+
"In pandas 2.0.0, the name of the resulting Series will be "
2001+
"'count' (or 'proportion' if `normalize=True`). Specify "
2002+
f"`name='{result_name}'` to silence this warning.",
2003+
FutureWarning,
2004+
stacklevel=find_stack_level(),
2005+
)
2006+
name = None
2007+
elif name is lib.no_default and not self.as_index:
2008+
name = None
2009+
19822010
if self.axis == 1:
19832011
raise NotImplementedError(
19842012
"DataFrameGroupBy.value_counts only handles axis=0"
@@ -1991,8 +2019,11 @@ def value_counts(
19912019
grouping.name for grouping in self.grouper.groupings if grouping.in_axis
19922020
}
19932021
if isinstance(self._selected_obj, Series):
1994-
name = self._selected_obj.name
1995-
keys = [] if name in in_axis_names else [self._selected_obj]
2022+
keys = (
2023+
[]
2024+
if self._selected_obj.name in in_axis_names
2025+
else [self._selected_obj]
2026+
)
19962027
else:
19972028
unique_cols = set(self._selected_obj.columns)
19982029
if subset is not None:
@@ -2015,8 +2046,8 @@ def value_counts(
20152046
keys = [
20162047
# Can't use .values because the column label needs to be preserved
20172048
self._selected_obj.iloc[:, idx]
2018-
for idx, name in enumerate(self._selected_obj.columns)
2019-
if name not in in_axis_names and name in subsetted
2049+
for idx, _name in enumerate(self._selected_obj.columns)
2050+
if _name not in in_axis_names and _name in subsetted
20202051
]
20212052

20222053
groupings = list(self.grouper.groupings)
@@ -2038,7 +2069,7 @@ def value_counts(
20382069
observed=self.observed,
20392070
dropna=self.dropna,
20402071
)
2041-
result_series = cast(Series, gb.size())
2072+
result_series = cast(Series, gb.size()).rename(name)
20422073

20432074
# GH-46357 Include non-observed categories
20442075
# of non-grouping columns regardless of `observed`
@@ -2082,7 +2113,8 @@ def value_counts(
20822113
result = result_series
20832114
else:
20842115
# Convert to frame
2085-
name = "proportion" if normalize else "count"
2116+
if name is None:
2117+
name = "proportion" if normalize else "count"
20862118
index = result_series.index
20872119
columns = com.fill_missing_names(index.names)
20882120
if name in columns:

0 commit comments

Comments
 (0)