Skip to content

Commit 1ffedc2

Browse files
authored
API: consistent default dropna for value_counts (#39511)
1 parent 388ff01 commit 1ffedc2

File tree

10 files changed

+26
-12
lines changed

10 files changed

+26
-12
lines changed

asv_bench/benchmarks/io/csv.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class ToCSVIndexes(BaseIO):
8484
def _create_df(rows, cols):
8585
index_cols = {
8686
"index1": np.random.randint(0, rows, rows),
87-
"index2": np.full(rows, 1, dtype=np.int),
88-
"index3": np.full(rows, 1, dtype=np.int),
87+
"index2": np.full(rows, 1, dtype=int),
88+
"index3": np.full(rows, 1, dtype=int),
8989
}
9090
data_cols = {
9191
f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)

pandas/core/arrays/_mixins.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -373,13 +373,13 @@ def delete(self: NDArrayBackedExtensionArrayT, loc) -> NDArrayBackedExtensionArr
373373
# These are not part of the EA API, but we implement them because
374374
# pandas assumes they're there.
375375

376-
def value_counts(self, dropna: bool = False):
376+
def value_counts(self, dropna: bool = True):
377377
"""
378378
Return a Series containing counts of unique values.
379379
380380
Parameters
381381
----------
382-
dropna : bool, default False
382+
dropna : bool, default True
383383
Don't include counts of NA values.
384384
385385
Returns

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1416,7 +1416,7 @@ def notna(self):
14161416

14171417
notnull = notna
14181418

1419-
def value_counts(self, dropna=True):
1419+
def value_counts(self, dropna: bool = True):
14201420
"""
14211421
Return a Series containing counts of each category.
14221422

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1021,7 +1021,7 @@ def _validate_setitem_value(self, value):
10211021
raise ValueError("Cannot set float NaN to integer-backed IntervalArray")
10221022
return value_left, value_right
10231023

1024-
def value_counts(self, dropna=True):
1024+
def value_counts(self, dropna: bool = True):
10251025
"""
10261026
Returns a Series containing counts of each interval.
10271027

pandas/core/arrays/sparse/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -725,13 +725,13 @@ def factorize(self, na_sentinel=-1):
725725
uniques = SparseArray(uniques, dtype=self.dtype)
726726
return codes, uniques
727727

728-
def value_counts(self, dropna=True):
728+
def value_counts(self, dropna: bool = True):
729729
"""
730730
Returns a Series containing counts of unique values.
731731
732732
Parameters
733733
----------
734-
dropna : boolean, default True
734+
dropna : bool, default True
735735
Don't include counts of NaN, even if NaN is in sp_values.
736736
737737
Returns

pandas/core/arrays/string_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
338338
)
339339
return self._wrap_reduction_result(axis, result)
340340

341-
def value_counts(self, dropna=False):
341+
def value_counts(self, dropna: bool = True):
342342
from pandas import value_counts
343343

344344
return value_counts(self._ndarray, dropna=dropna).astype("Int64")

pandas/core/groupby/generic.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,12 @@ def describe(self, **kwargs):
678678
return result.unstack()
679679

680680
def value_counts(
681-
self, normalize=False, sort=True, ascending=False, bins=None, dropna=True
681+
self,
682+
normalize=False,
683+
sort=True,
684+
ascending=False,
685+
bins=None,
686+
dropna: bool = True,
682687
):
683688

684689
from pandas.core.reshape.merge import get_join_indexers

pandas/tests/arrays/test_datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_value_counts_preserves_tz(self):
172172
assert result.index.equals(dti)
173173

174174
arr[-2] = pd.NaT
175-
result = arr.value_counts()
175+
result = arr.value_counts(dropna=False)
176176
expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT])
177177
tm.assert_series_equal(result, expected)
178178

pandas/tests/extension/base/methods.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import inspect
12
import operator
23

34
import numpy as np
@@ -15,6 +16,14 @@
1516
class BaseMethodsTests(BaseExtensionTests):
1617
"""Various Series and DataFrame methods."""
1718

19+
def test_value_counts_default_dropna(self, data):
20+
# make sure we have consistent default dropna kwarg
21+
if not hasattr(data, "value_counts"):
22+
pytest.skip("value_counts is not implemented")
23+
sig = inspect.signature(data.value_counts)
24+
kwarg = sig.parameters["dropna"]
25+
assert kwarg.default is True
26+
1827
@pytest.mark.parametrize("dropna", [True, False])
1928
def test_value_counts(self, all_data, dropna):
2029
all_data = all_data[:10]

pandas/tests/extension/decimal/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def convert_values(param):
230230

231231
return np.asarray(res, dtype=bool)
232232

233-
def value_counts(self, dropna: bool = False):
233+
def value_counts(self, dropna: bool = True):
234234
from pandas.core.algorithms import value_counts
235235

236236
return value_counts(self.to_numpy(), dropna=dropna)

0 commit comments

Comments
 (0)