diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 9f5827eabee52..12de9b121ef6d 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -84,8 +84,8 @@ class ToCSVIndexes(BaseIO): def _create_df(rows, cols): index_cols = { "index1": np.random.randint(0, rows, rows), - "index2": np.full(rows, 1, dtype=np.int), - "index3": np.full(rows, 1, dtype=np.int), + "index2": np.full(rows, 1, dtype=int), + "index3": np.full(rows, 1, dtype=int), } data_cols = { f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 06b46c50e9467..eb7c9e69d962b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -373,13 +373,13 @@ def delete(self: NDArrayBackedExtensionArrayT, loc) -> NDArrayBackedExtensionArr # These are not part of the EA API, but we implement them because # pandas assumes they're there. - def value_counts(self, dropna: bool = False): + def value_counts(self, dropna: bool = True): """ Return a Series containing counts of unique values. Parameters ---------- - dropna : bool, default False + dropna : bool, default True Don't include counts of NA values. Returns diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 48316373a1140..af78b84923a9c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1416,7 +1416,7 @@ def notna(self): notnull = notna - def value_counts(self, dropna=True): + def value_counts(self, dropna: bool = True): """ Return a Series containing counts of each category. diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 882ca0955bc99..dd1b396ee761f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1021,7 +1021,7 @@ def _validate_setitem_value(self, value): raise ValueError("Cannot set float NaN to integer-backed IntervalArray") return value_left, value_right - def value_counts(self, dropna=True): + def value_counts(self, dropna: bool = True): """ Returns a Series containing counts of each interval. diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index f87f40cd55e2c..4f68ed3d9a79d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -725,13 +725,13 @@ def factorize(self, na_sentinel=-1): uniques = SparseArray(uniques, dtype=self.dtype) return codes, uniques - def value_counts(self, dropna=True): + def value_counts(self, dropna: bool = True): """ Returns a Series containing counts of unique values. Parameters ---------- - dropna : boolean, default True + dropna : bool, default True Don't include counts of NaN, even if NaN is in sp_values. Returns diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2e4580207bc8a..65618ce32b6d7 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -338,7 +338,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: ) return self._wrap_reduction_result(axis, result) - def value_counts(self, dropna=False): + def value_counts(self, dropna: bool = True): from pandas import value_counts return value_counts(self._ndarray, dropna=dropna).astype("Int64") diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a98ef15696339..c3e2321b22b05 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -679,7 +679,12 @@ def describe(self, **kwargs): return result.unstack() def value_counts( - self, normalize=False, sort=True, ascending=False, bins=None, dropna=True + self, + normalize=False, + sort=True, + ascending=False, + bins=None, + dropna: bool = True, ): from pandas.core.reshape.merge import get_join_indexers diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 86c4b4c5ce63d..d159d76030250 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -172,7 +172,7 @@ def test_value_counts_preserves_tz(self): assert result.index.equals(dti) arr[-2] = pd.NaT - result = arr.value_counts() + result = arr.value_counts(dropna=False) expected = pd.Series([4, 2, 1], index=[dti[0], dti[1], pd.NaT]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 5906221389b35..67348cfe57c2d 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -1,3 +1,4 @@ +import inspect import operator import numpy as np @@ -15,6 +16,14 @@ class BaseMethodsTests(BaseExtensionTests): """Various Series and DataFrame methods.""" + def test_value_counts_default_dropna(self, data): + # make sure we have consistent default dropna kwarg + if not hasattr(data, "value_counts"): + pytest.skip("value_counts is not implemented") + sig = inspect.signature(data.value_counts) + kwarg = sig.parameters["dropna"] + assert kwarg.default is True + @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna): all_data = all_data[:10] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 4122fcaae496b..c7976c5800173 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -230,7 +230,7 @@ def convert_values(param): return np.asarray(res, dtype=bool) - def value_counts(self, dropna: bool = False): + def value_counts(self, dropna: bool = True): from pandas.core.algorithms import value_counts return value_counts(self.to_numpy(), dropna=dropna)