-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
WARN introduce FutureWarning for value_counts behaviour change #49640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
25bef51
784cdbf
7d4be12
1c0d22b
098596a
4160b9b
29e3386
c02628e
b672854
4b3d59d
66e530f
401d595
3c67b7d
4c76219
1a11d65
8cafda4
b3f3984
210e624
c9acbf2
0c7f2d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -604,7 +604,19 @@ def value_counts( | |
ascending: bool = False, | ||
bins=None, | ||
dropna: bool = True, | ||
*, | ||
name: Hashable | None = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is adding name to the API, which would presumably be deprecated in 2.0. So users would be adding name to silence the warning, then removing it to silence the new deprecation warning. My understanding of the proposal was to just add a warning - no arguments - that the user will see on every use. While that is very noisy (maybe we could do a DeprecationWarning instead?), I think it's better than having users change code only to change it back. cc @jreback There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks @rhshadrach for taking a look - yeah if it's OK to emit a warning each time then that'd be better, I'm also not keen on adding something to the API and then immediately deprecating it. I'll do that |
||
) -> Series: | ||
if name is None: | ||
result_name = "proportion" if normalize else "count" | ||
warnings.warn( | ||
"In pandas 2.0.0, the name of the resulting Series will be " | ||
"'count' (or 'proportion' if `normalize=True`). Specify " | ||
f"`name='{result_name}'` to silence this warning.", | ||
FutureWarning, | ||
stacklevel=find_stack_level(), | ||
) | ||
name = self.obj.name | ||
|
||
from pandas.core.reshape.merge import get_join_indexers | ||
from pandas.core.reshape.tile import cut | ||
|
@@ -626,6 +638,7 @@ def value_counts( | |
sort=sort, | ||
ascending=ascending, | ||
bins=bins, | ||
name=name, | ||
) | ||
ser.index.names = names | ||
return ser | ||
|
@@ -741,7 +754,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: | |
|
||
if is_integer_dtype(out.dtype): | ||
out = ensure_int64(out) | ||
return self.obj._constructor(out, index=mi, name=self.obj.name) | ||
return self.obj._constructor(out, index=mi, name=name) | ||
|
||
def fillna( | ||
self, | ||
|
@@ -1875,6 +1888,8 @@ def value_counts( | |
sort: bool = True, | ||
ascending: bool = False, | ||
dropna: bool = True, | ||
*, | ||
name: Hashable | None = None, | ||
) -> DataFrame | Series: | ||
""" | ||
Return a Series or DataFrame containing counts of unique rows. | ||
|
@@ -1979,6 +1994,16 @@ def value_counts( | |
3 male low US 0.25 | ||
4 male medium FR 0.25 | ||
""" | ||
if name is None and self.as_index: | ||
result_name = "proportion" if normalize else "count" | ||
warnings.warn( | ||
"In pandas 2.0.0, the name of the resulting Series will be " | ||
"'count' (or 'proportion' if `normalize=True`). Specify " | ||
f"`name='{result_name}'` to silence this warning.", | ||
FutureWarning, | ||
stacklevel=find_stack_level(), | ||
) | ||
|
||
if self.axis == 1: | ||
raise NotImplementedError( | ||
"DataFrameGroupBy.value_counts only handles axis=0" | ||
|
@@ -1991,8 +2016,11 @@ def value_counts( | |
grouping.name for grouping in self.grouper.groupings if grouping.in_axis | ||
} | ||
if isinstance(self._selected_obj, Series): | ||
name = self._selected_obj.name | ||
keys = [] if name in in_axis_names else [self._selected_obj] | ||
keys = ( | ||
[] | ||
if self._selected_obj.name in in_axis_names | ||
else [self._selected_obj] | ||
) | ||
else: | ||
unique_cols = set(self._selected_obj.columns) | ||
if subset is not None: | ||
|
@@ -2015,8 +2043,8 @@ def value_counts( | |
keys = [ | ||
# Can't use .values because the column label needs to be preserved | ||
self._selected_obj.iloc[:, idx] | ||
for idx, name in enumerate(self._selected_obj.columns) | ||
if name not in in_axis_names and name in subsetted | ||
for idx, _name in enumerate(self._selected_obj.columns) | ||
if _name not in in_axis_names and _name in subsetted | ||
] | ||
|
||
groupings = list(self.grouper.groupings) | ||
|
@@ -2038,7 +2066,7 @@ def value_counts( | |
observed=self.observed, | ||
dropna=self.dropna, | ||
) | ||
result_series = cast(Series, gb.size()) | ||
result_series = cast(Series, gb.size()).rename(name) | ||
|
||
# GH-46357 Include non-observed categories | ||
# of non-grouping columns regardless of `observed` | ||
|
@@ -2082,7 +2110,8 @@ def value_counts( | |
result = result_series | ||
else: | ||
# Convert to frame | ||
name = "proportion" if normalize else "count" | ||
if name is None: | ||
name = "proportion" if normalize else "count" | ||
index = result_series.index | ||
columns = com.fill_missing_names(index.names) | ||
if name in columns: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this warrants a full on section and before / after on how to fix