diff --git a/doc/source/getting_started/comparison/includes/filtering.rst b/doc/source/getting_started/comparison/includes/filtering.rst index 8ddf7c0d2fa39..33f207e8bec30 100644 --- a/doc/source/getting_started/comparison/includes/filtering.rst +++ b/doc/source/getting_started/comparison/includes/filtering.rst @@ -9,6 +9,7 @@ The above statement is simply passing a ``Series`` of ``True``/``False`` objects returning all rows with ``True``. .. ipython:: python + :okwarning: is_dinner = tips["time"] == "Dinner" is_dinner diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst index 032a6add522e0..7e800851b744d 100644 --- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -224,6 +224,7 @@ Count number of records by category What is the number of passengers in each of the cabin classes? .. ipython:: python + :okwarning: titanic["Pclass"].value_counts() diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 6fc53fe09d791..77b3364a78842 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -430,6 +430,7 @@ Histogramming See more at :ref:`Histogramming and Discretization `. .. ipython:: python + :okwarning: s = pd.Series(np.random.randint(0, 7, size=10)) s diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index 2204c8b04e438..f2ee5b0924f44 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -689,6 +689,7 @@ The :meth:`~Series.value_counts` Series method and top-level function computes a of a 1D array of values. It can also be used as a function on regular arrays: .. ipython:: python + :okwarning: data = np.random.randint(0, 7, size=50) data @@ -702,6 +703,7 @@ The :meth:`~DataFrame.value_counts` method can be used to count combinations acr By default all columns are used but a subset can be selected using the ``subset`` argument. .. ipython:: python + :okwarning: data = {"a": [1, 2, 3, 4], "b": ["x", "x", "y", "y"]} frame = pd.DataFrame(data) @@ -741,6 +743,7 @@ and :func:`qcut` (bins based on sample quantiles) functions: normally distributed data into equal-size quartiles like so: .. ipython:: python + :okwarning: arr = np.random.randn(30) factor = pd.qcut(arr, [0, 0.25, 0.5, 0.75, 1]) @@ -2102,6 +2105,7 @@ The number of columns of each type in a ``DataFrame`` can be found by calling ``DataFrame.dtypes.value_counts()``. .. ipython:: python + :okwarning: dft.dtypes.value_counts() diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index f3d68f4c471c1..1e71939336b5d 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -611,6 +611,7 @@ following operations are possible with categorical data: even if some categories are not present in the data: .. ipython:: python + :okwarning: s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"])) s.value_counts() diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index daf5a0e481b8e..44f8beb02f822 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -694,6 +694,7 @@ The :ref:`Pivot ` docs. `__ .. ipython:: python + :okwarning: grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78] df = pd.DataFrame( diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b4bf3ef024d4c..26f96014d7b7b 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -418,6 +418,7 @@ For instance, you can use the ``converters`` argument of :func:`~pandas.read_csv`: .. ipython:: python + :okwarning: data = "col_1\n1\n2\n'A'\n4.22" df = pd.read_csv(StringIO(data), converters={"col_1": str}) @@ -428,6 +429,7 @@ Or you can use the :func:`~pandas.to_numeric` function to coerce the dtypes after reading in the data, .. ipython:: python + :okwarning: df2 = pd.read_csv(StringIO(data)) df2["col_1"] = pd.to_numeric(df2["col_1"], errors="coerce") @@ -4329,6 +4331,7 @@ nan representation on disk (which converts to/from ``np.nan``), this defaults to ``nan``. .. ipython:: python + :okwarning: df_mixed = pd.DataFrame( { diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index aefb5f0d3d2df..a0838dd90cd77 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -102,6 +102,7 @@ sentinel value that can be represented by NumPy in a singular dtype (datetime64[ pandas objects provide compatibility between ``NaT`` and ``NaN``. .. ipython:: python + :okwarning: df2 = df.copy() df2["timestamp"] = pd.Timestamp("20120101") diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 129f43dd36930..d40eca81b62ba 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -220,6 +220,7 @@ counts up to this point. As long as each individual file fits in memory, this wi work for arbitrary-sized datasets. .. ipython:: python + :okwarning: %%time files = pathlib.Path("data/timeseries/").glob("ts*.parquet") @@ -302,6 +303,7 @@ returns a Dask Series with the same dtype and the same name. To get the actual result you can call ``.compute()``. .. ipython:: python + :okwarning: %time ddf["name"].value_counts().compute() diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst index 611ac2021fcec..ef54416b3a9d4 100644 --- a/doc/source/whatsnew/v0.10.1.rst +++ b/doc/source/whatsnew/v0.10.1.rst @@ -83,6 +83,7 @@ Retrieving unique values in an indexable or data column. You can now store ``datetime64`` in data columns .. ipython:: python + :okwarning: df_mixed = df.copy() df_mixed["datetime64"] = pd.Timestamp("20010102") diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst index 33f83c272b23d..bd19218a4a12a 100644 --- a/doc/source/whatsnew/v0.11.0.rst +++ b/doc/source/whatsnew/v0.11.0.rst @@ -289,6 +289,7 @@ Furthermore ``datetime64[ns]`` columns are created by default, when passed datet (:issue:`2809`, :issue:`2810`) .. ipython:: python + :okwarning: df = pd.DataFrame(np.random.randn(6, 2), pd.date_range('20010102', periods=6), columns=['A', ' B']) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e9aa5d60314a5..ea6c8b7c74ba0 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -561,6 +561,7 @@ integer dtype for the values. *pandas 1.0.0* .. ipython:: python + :okwarning: pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5895a06792ffb..81e7288461207 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -325,6 +325,7 @@ behavior is now consistent with ``unique``, ``isin`` and others (:issue:`42688`). .. ipython:: python + :okwarning: s = pd.Series([True, None, pd.NaT, None, pd.NaT, None]) res = s.value_counts(dropna=False) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index 446235d1656dc..35081c5e8a98e 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -33,7 +33,7 @@ Bug fixes Other ~~~~~ -- +- Introduced ``FutureWarning`` notifying about behaviour change in :meth:`DataFrame.value_counts`, :meth:`Series.value_counts`, :meth:`DataFrameGroupBy.value_counts`, :meth:`SeriesGroupBy.value_counts` - the resulting series will by default now be named ``'counts'`` (or ``'proportion'`` if ``normalize=True``), and the index (if present) will be taken from the original object's name (:issue:`49497`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/base.py b/pandas/core/base.py index 46803e1f28975..64e41a0ce43de 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,6 +17,7 @@ final, overload, ) +import warnings import numpy as np @@ -37,6 +38,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -991,6 +993,13 @@ def value_counts( NaN 1 dtype: int64 """ + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`), and the index " + "will inherit the original object's name.", + FutureWarning, + stacklevel=find_stack_level(), + ) return value_counts( self, sort=sort, diff --git a/pandas/core/describe.py b/pandas/core/describe.py index 33afbfe6489a6..d321274202a07 100644 --- a/pandas/core/describe.py +++ b/pandas/core/describe.py @@ -17,6 +17,7 @@ Sequence, cast, ) +import warnings import numpy as np @@ -252,7 +253,10 @@ def describe_categorical_1d( Ignored, but in place to unify interface. """ names = ["count", "unique", "top", "freq"] - objcounts = data.value_counts() + with warnings.catch_warnings(): + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + objcounts = data.value_counts() count_unique = len(objcounts[objcounts != 0]) if count_unique > 0: top, freq = objcounts.index[0], objcounts.iloc[0] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efad2edddf360..b4aca624f266c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7035,6 +7035,12 @@ def value_counts( NaN 1 dtype: int64 """ + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`).", + FutureWarning, + stacklevel=find_stack_level(), + ) if subset is None: subset = self.columns.tolist() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 571559dc838f5..ba2e6228c0713 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -604,6 +604,12 @@ def value_counts( bins=None, dropna: bool = True, ) -> Series: + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`).", + FutureWarning, + stacklevel=find_stack_level(), + ) from pandas.core.reshape.merge import get_join_indexers from pandas.core.reshape.tile import cut @@ -619,13 +625,18 @@ def value_counts( # scalar bins cannot be done at top level # in a backward compatible way # GH38672 relates to categorical dtype - ser = self.apply( - Series.value_counts, - normalize=normalize, - sort=sort, - ascending=ascending, - bins=bins, - ) + with warnings.catch_warnings(): + # The warning has already been emitted above, + # no need to re-emit it for each group. + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + ser = self.apply( + Series.value_counts, + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + ) ser.index.names = names return ser @@ -1976,6 +1987,14 @@ def value_counts( 3 male low US 0.25 4 male medium FR 0.25 """ + if self.as_index: + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`).", + FutureWarning, + stacklevel=find_stack_level(), + ) + if self.axis == 1: raise NotImplementedError( "DataFrameGroupBy.value_counts only handles axis=0" diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 5e87db93cf56c..93636a6aa6c6a 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -13,6 +13,7 @@ Mapping, Sequence, ) +import warnings from pandas._config import get_option @@ -1097,5 +1098,9 @@ def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: """ Create mapping between datatypes and their number of occurrences. """ - # groupby dtype.name to collect e.g. Categorical columns - return df.dtypes.value_counts().groupby(lambda x: x.name).sum() + with warnings.catch_warnings(): + # This warning is emitted on all calls - can remove it in 2.0.0 + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + # groupby dtype.name to collect e.g. Categorical columns + return df.dtypes.value_counts().groupby(lambda x: x.name).sum() diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 8e9112b531fad..edfbf992d7e27 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -104,7 +104,8 @@ def test_value_counts_na(): def test_value_counts_with_normalize(): ser = pd.Series([True, False, pd.NA], dtype="boolean") - result = ser.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = ser.value_counts(normalize=True) expected = pd.Series([1, 1], index=ser[:-1], dtype="Float64") / 2 assert expected.index.dtype == "boolean" tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index fbdf419811e24..a4657473397f0 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -119,7 +119,8 @@ def test_value_counts_empty(): def test_value_counts_with_normalize(): ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64") - result = ser.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = ser.value_counts(normalize=True) expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 assert expected.index.dtype == ser.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 73c8d4e6b1aed..94b4e2c09d566 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -123,7 +123,8 @@ def test_value_counts_na(): def test_value_counts_empty(): # https://github.com/pandas-dev/pandas/issues/33317 ser = pd.Series([], dtype="Int64") - result = ser.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = ser.value_counts() idx = pd.Index([], dtype=ser.dtype) assert idx.dtype == ser.dtype expected = pd.Series([], index=idx, dtype="Int64") @@ -133,7 +134,8 @@ def test_value_counts_empty(): def test_value_counts_with_normalize(): # GH 33172 ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64") - result = ser.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = ser.value_counts(normalize=True) expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 assert expected.index.dtype == ser.dtype tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index e321e8da15a6e..b821485cbb927 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -494,7 +494,8 @@ def test_value_counts_na(dtype): def test_value_counts_with_normalize(dtype): ser = pd.Series(["a", "b", "a", pd.NA], dtype=dtype) - result = ser.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = ser.value_counts(normalize=True) expected = pd.Series([2, 1], index=ser[:2], dtype="Float64") / 3 tm.assert_series_equal(result, expected) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index dafbd9fee1b8e..90c228433bdb2 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -24,7 +24,8 @@ def test_value_counts(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) - result = obj.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts() counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) @@ -75,7 +76,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): expected = Series(dict(counter.most_common()), dtype=np.int64) expected.index = expected.index.astype(obj.dtype) - result = obj.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts() if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) @@ -97,7 +99,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): expected[null_obj] = 3 - result = obj.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts(dropna=False) if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) @@ -119,7 +122,8 @@ def test_value_counts_inferred(index_or_series): s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] s = klass(s_values) expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"]) - tm.assert_series_equal(s.value_counts(), expected) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(np.unique(np.array(s_values, dtype=np.object_))) @@ -131,17 +135,20 @@ def test_value_counts_inferred(index_or_series): assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep - hist = s.value_counts(sort=False).sort_values() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values() tm.assert_series_equal(hist, expected) # sort ascending - hist = s.value_counts(ascending=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list("cdab")) tm.assert_series_equal(hist, expected) # relative histogram. - hist = s.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(normalize=True) expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"]) tm.assert_series_equal(hist, expected) @@ -153,14 +160,17 @@ def test_value_counts_bins(index_or_series): # bins msg = "bins argument only works with numeric data" - with pytest.raises(TypeError, match=msg): - s.value_counts(bins=1) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + with pytest.raises(TypeError, match=msg): + s.value_counts(bins=1) s1 = Series([1, 1, 2, 3]) - res1 = s1.value_counts(bins=1) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) - res1n = s1.value_counts(bins=1, normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) @@ -173,17 +183,20 @@ def test_value_counts_bins(index_or_series): assert s1.nunique() == 3 # these return the same - res4 = s1.value_counts(bins=4, dropna=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) - res4 = s1.value_counts(bins=4, dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) - res4n = s1.value_counts(bins=4, normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4n, exp4n) @@ -191,7 +204,8 @@ def test_value_counts_bins(index_or_series): s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] s = klass(s_values) expected = Series([4, 3, 2], index=["b", "a", "d"]) - tm.assert_series_equal(s.value_counts(), expected) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(["a", "b", np.nan, "d"]) @@ -203,7 +217,8 @@ def test_value_counts_bins(index_or_series): s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) - tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) @@ -241,7 +256,8 @@ def test_value_counts_datetime64(index_or_series): ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] ) expected_s = Series([3, 2, 1], index=idx) - tm.assert_series_equal(s.value_counts(), expected_s) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected_s) expected = pd.array( np.array( @@ -260,11 +276,13 @@ def test_value_counts_datetime64(index_or_series): s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT] * 4) - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) - result = s.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = s.value_counts(dropna=False) expected_s = pd.concat([Series([4], index=DatetimeIndex([pd.NaT])), expected_s]) tm.assert_series_equal(result, expected_s) @@ -287,7 +305,8 @@ def test_value_counts_datetime64(index_or_series): td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") - result = td.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) @@ -299,7 +318,8 @@ def test_value_counts_datetime64(index_or_series): td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") - result2 = td2.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s) @@ -309,7 +329,8 @@ def test_value_counts_with_nan(dropna, index_or_series): klass = index_or_series values = [True, pd.NA, np.nan] obj = klass(values) - res = obj.value_counts(dropna=dropna) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res = obj.value_counts(dropna=dropna) if dropna is True: expected = Series([1], index=Index([True], dtype=obj.dtype)) else: diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 2df410dff2b00..2b50e8e7f490a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -1,5 +1,6 @@ import inspect import operator +import warnings import numpy as np import pytest @@ -32,8 +33,16 @@ def test_value_counts(self, all_data, dropna): else: other = all_data - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + # TODO pytest.mark.filterwarnings doesn't seem to work if there's + # a tm.maybe_produces_warning as well - for now, we can catch the + # warnings like this, but it'd be good to update the pandas testing + # machinery to be able to combine pytest.mark.filterwarnings and + # tm.maybe_produces_warning + with warnings.catch_warnings(): + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() self.assert_series_equal(result, expected) @@ -43,7 +52,14 @@ def test_value_counts_with_normalize(self, data): values = np.array(data[~data.isna()]) ser = pd.Series(data, dtype=data.dtype) - result = ser.value_counts(normalize=True).sort_index() + # PerformanceWarning may be raised if pyarrow version is less + # than 7.0.0. The subclass uses pytest.mark.filterwarnings to + # silence it, but that doesn't work with tm.assert_produces_warning, + # so we need to catch that here as well. + with warnings.catch_warnings(): + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + result = ser.value_counts(normalize=True).sort_index() if not isinstance(data, pd.Categorical): expected = pd.Series([1 / len(values)] * len(values), index=result.index) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index acba1bd557351..a65c78a2689fb 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -140,8 +140,9 @@ def test_value_counts(self, all_data, dropna, request): else: other = all_data - vcs = pd.Series(all_data).value_counts(dropna=dropna) - vcs_ex = pd.Series(other).value_counts(dropna=dropna) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + vcs = pd.Series(all_data).value_counts(dropna=dropna) + vcs_ex = pd.Series(other).value_counts(dropna=dropna) with decimal.localcontext() as ctx: # avoid raising when comparing Decimal("NAN") < Decimal(2) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 8cbd4342ea13f..f646b66eb3fdb 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -14,6 +14,7 @@ """ import string +import warnings import numpy as np import pytest @@ -257,14 +258,25 @@ def test_value_counts(self, all_data, dropna, request): and getattr(all_data.dtype, "storage", "") == "pyarrow" and not (dropna and "data_missing" in request.node.nodeid), ): - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + # TODO pytest.mark.filterwarnings doesn't seem to work if there's + # a tm.maybe_produces_warning as well - for now, we can catch the + # warnings like this, but it'd be good to update the pandas testing + # machinery to be able to combine pytest.mark.filterwarnings and + # tm.maybe_produces_warning + with warnings.catch_warnings(): + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0 and getattr(other.dtype, "storage", "") == "pyarrow" and not (dropna and "data_missing" in request.node.nodeid), ): - expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + with warnings.catch_warnings(): + msg = "In pandas 2.0.0, the name of the resulting Series" + warnings.filterwarnings("ignore", msg, FutureWarning) + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() self.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 07eacb5e89e3a..51076041d6c38 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -48,7 +48,8 @@ def test_asfreq2(self, frame_or_series): if frame_or_series is Series: daily_ts = ts.asfreq("D", fill_value=-1) - result = daily_ts.value_counts().sort_index() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = daily_ts.value_counts().sort_index() expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index 6e8528845ea6b..a8d5ca72723cc 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -10,7 +10,8 @@ def test_data_frame_value_counts_unsorted(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(sort=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(sort=False) expected = pd.Series( data=[1, 2, 1], index=pd.MultiIndex.from_arrays( @@ -27,7 +28,8 @@ def test_data_frame_value_counts_ascending(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(ascending=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(ascending=True) expected = pd.Series( data=[1, 1, 2], index=pd.MultiIndex.from_arrays( @@ -44,7 +46,8 @@ def test_data_frame_value_counts_default(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[2, 1, 1], index=pd.MultiIndex.from_arrays( @@ -61,7 +64,8 @@ def test_data_frame_value_counts_normalize(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(normalize=True) expected = pd.Series( data=[0.5, 0.25, 0.25], index=pd.MultiIndex.from_arrays( @@ -75,7 +79,8 @@ def test_data_frame_value_counts_normalize(): def test_data_frame_value_counts_single_col_default(): df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[2, 1, 1], index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]), @@ -87,7 +92,8 @@ def test_data_frame_value_counts_single_col_default(): def test_data_frame_value_counts_empty(): df_no_cols = pd.DataFrame() - result = df_no_cols.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df_no_cols.value_counts() expected = pd.Series([], dtype=np.int64) tm.assert_series_equal(result, expected) @@ -96,7 +102,8 @@ def test_data_frame_value_counts_empty(): def test_data_frame_value_counts_empty_normalize(): df_no_cols = pd.DataFrame() - result = df_no_cols.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df_no_cols.value_counts(normalize=True) expected = pd.Series([], dtype=np.float64) tm.assert_series_equal(result, expected) @@ -110,7 +117,8 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture): "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], }, ) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[1, 1], index=pd.MultiIndex.from_arrays( @@ -130,7 +138,8 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture): }, ) - result = df.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(dropna=False) expected = pd.Series( data=[1, 1, 1, 1], index=pd.MultiIndex( diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 1ab20c282b23a..879f05209f74b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -218,7 +218,8 @@ def test_with_datetimelikes(self): ) t = df.T - result = t.dtypes.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = t.dtypes.value_counts() expected = Series({np.dtype("object"): 10}) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 822e761f63ae8..08904dae6e46d 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -443,7 +443,8 @@ def test_df_flex_cmp_constant_return_types(self, opname): df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) const = 2 - result = getattr(df, opname)(const).dtypes.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = getattr(df, opname)(const).dtypes.value_counts() tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) @@ -453,7 +454,8 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): const = 2 empty = df.iloc[:0] - result = getattr(empty, opname)(const).dtypes.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = getattr(empty, opname)(const).dtypes.value_counts() tm.assert_series_equal(result, Series([2], index=[np.dtype(bool)])) def test_df_flex_cmp_ea_dtype_with_ndarray_series(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 2952db7febea1..123cdc628c718 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2367,8 +2367,9 @@ def test_construct_with_two_categoricalindex_series(self): def test_constructor_series_nonexact_categoricalindex(self): # GH 42424 ser = Series(range(0, 100)) - ser1 = cut(ser, 10).value_counts().head(5) - ser2 = cut(ser, 10).value_counts().tail(5) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + ser1 = cut(ser, 10).value_counts().head(5) + ser2 = cut(ser, 10).value_counts().tail(5) result = DataFrame({"1": ser1, "2": ser2}) index = CategoricalIndex( [ diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index fa51a291bf2f7..8bfb92730b481 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -855,7 +855,8 @@ def test_apply_series_return_dataframe_groups(): ) def most_common_values(df): - return Series({c: s.value_counts().index[0] for c, s in df.items()}) + with tm.assert_produces_warning(FutureWarning, match="name of the resulting"): + return Series({c: s.value_counts().index[0] for c, s in df.items()}) result = tdf.groupby("day").apply(most_common_values)["userId"] expected = Series( diff --git a/pandas/tests/groupby/test_frame_value_counts.py b/pandas/tests/groupby/test_frame_value_counts.py index 8255fbab40dce..ac3443ef47222 100644 --- a/pandas/tests/groupby/test_frame_value_counts.py +++ b/pandas/tests/groupby/test_frame_value_counts.py @@ -10,6 +10,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + @pytest.fixture def education_df(): @@ -24,21 +29,24 @@ def education_df(): def test_axis(education_df): gp = education_df.groupby("country", axis=1) - with pytest.raises(NotImplementedError, match="axis"): - gp.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(NotImplementedError, match="axis"): + gp.value_counts() def test_bad_subset(education_df): gp = education_df.groupby("country") - with pytest.raises(ValueError, match="subset"): - gp.value_counts(subset=["country"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match="subset"): + gp.value_counts(subset=["country"]) def test_basic(education_df): # gh43564 - result = education_df.groupby("country")[["gender", "education"]].value_counts( - normalize=True - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = education_df.groupby("country")[["gender", "education"]].value_counts( + normalize=True + ) expected = Series( data=[0.5, 0.25, 0.25, 0.5, 0.5], index=MultiIndex.from_tuples( @@ -69,10 +77,10 @@ def _frame_value_counts(df, keys, normalize, sort, ascending): (True, False), ], ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize("frame", [True, False]) def test_against_frame_and_seriesgroupby( - education_df, groupby, normalize, sort, ascending, as_index, frame + education_df, groupby, normalize, sort, ascending, as_index, warning, frame ): # test all parameters: # - Use column, array or function as by= parameter @@ -89,14 +97,16 @@ def test_against_frame_and_seriesgroupby( }[groupby] gp = education_df.groupby(by=by, as_index=as_index) - result = gp[["gender", "education"]].value_counts( - normalize=normalize, sort=sort, ascending=ascending - ) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp[["gender", "education"]].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) if frame: # compare against apply with DataFrame value_counts - expected = gp.apply( - _frame_value_counts, ["gender", "education"], normalize, sort, ascending - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + expected = gp.apply( + _frame_value_counts, ["gender", "education"], normalize, sort, ascending + ) if as_index: tm.assert_series_equal(result, expected) @@ -114,9 +124,10 @@ def test_against_frame_and_seriesgroupby( else: # compare against SeriesGroupBy value_counts education_df["both"] = education_df["gender"] + "-" + education_df["education"] - expected = gp["both"].value_counts( - normalize=normalize, sort=sort, ascending=ascending - ) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + expected = gp["both"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) expected.name = None if as_index: index_frame = expected.index.to_frame(index=False) @@ -189,9 +200,10 @@ def test_data_frame_value_counts( ): # 3-way compare with :meth:`~DataFrame.value_counts` # Tests from frame/methods/test_value_counts.py - result_frame = animals_df.value_counts( - sort=sort, ascending=ascending, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame = animals_df.value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) expected = Series( data=expected_data, index=MultiIndex.from_arrays( @@ -200,9 +212,10 @@ def test_data_frame_value_counts( ) tm.assert_series_equal(result_frame, expected) - result_frame_groupby = animals_df.groupby("key").value_counts( - sort=sort, ascending=ascending, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame_groupby = animals_df.groupby("key").value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) tm.assert_series_equal(result_frame_groupby, expected) @@ -238,7 +251,8 @@ def test_dropna_combinations( nulls_df, group_dropna, count_dropna, expected_rows, expected_values ): gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) - result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() for column in nulls_df.columns: columns[column] = [nulls_df[column][row] for row in expected_rows] @@ -291,7 +305,10 @@ def test_data_frame_value_counts_dropna( # GH 41334 # 3-way compare with :meth:`~DataFrame.value_counts` # Tests with nulls from frame/methods/test_value_counts.py - result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame = names_with_nulls_df.value_counts( + dropna=dropna, normalize=normalize + ) expected = Series( data=expected_data, index=expected_index, @@ -301,14 +318,15 @@ def test_data_frame_value_counts_dropna( tm.assert_series_equal(result_frame, expected) - result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( - dropna=dropna, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( + dropna=dropna, normalize=normalize + ) tm.assert_series_equal(result_frame_groupby, expected) -@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("as_index, warning", ([(False, None), (True, FutureWarning)])) @pytest.mark.parametrize("observed", [False, True]) @pytest.mark.parametrize( "normalize, expected_data", @@ -321,7 +339,7 @@ def test_data_frame_value_counts_dropna( ], ) def test_categorical_single_grouper_with_only_observed_categories( - education_df, as_index, observed, normalize, expected_data + education_df, as_index, warning, observed, normalize, expected_data ): # Test single categorical grouper with only observed grouping categories @@ -330,7 +348,8 @@ def test_categorical_single_grouper_with_only_observed_categories( gp = education_df.astype("category").groupby( "country", as_index=as_index, observed=observed ) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_index = MultiIndex.from_tuples( [ @@ -369,7 +388,7 @@ def test_categorical_single_grouper_with_only_observed_categories( def assert_categorical_single_grouper( - education_df, as_index, observed, expected_index, normalize, expected_data + education_df, as_index, warning, observed, expected_index, normalize, expected_data ): # Test single categorical grouper when non-groupers are also categorical education_df = education_df.copy().astype("category") @@ -378,7 +397,8 @@ def assert_categorical_single_grouper( education_df["country"] = education_df["country"].cat.add_categories(["ASIA"]) gp = education_df.groupby("country", as_index=as_index, observed=observed) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_series = Series( data=expected_data, @@ -404,7 +424,7 @@ def assert_categorical_single_grouper( tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "normalize, expected_data", [ @@ -416,7 +436,7 @@ def assert_categorical_single_grouper( ], ) def test_categorical_single_grouper_observed_true( - education_df, as_index, normalize, expected_data + education_df, as_index, warning, normalize, expected_data ): # GH#46357 @@ -442,10 +462,11 @@ def test_categorical_single_grouper_observed_true( expected_index=expected_index, normalize=normalize, expected_data=expected_data, + warning=warning, ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "normalize, expected_data", [ @@ -483,7 +504,7 @@ def test_categorical_single_grouper_observed_true( ], ) def test_categorical_single_grouper_observed_false( - education_df, as_index, normalize, expected_data + education_df, as_index, warning, normalize, expected_data ): # GH#46357 @@ -511,6 +532,7 @@ def test_categorical_single_grouper_observed_false( assert_categorical_single_grouper( education_df=education_df, as_index=as_index, + warning=warning, observed=False, expected_index=expected_index, normalize=normalize, @@ -518,7 +540,7 @@ def test_categorical_single_grouper_observed_false( ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "observed, expected_index", [ @@ -563,7 +585,7 @@ def test_categorical_single_grouper_observed_false( ], ) def test_categorical_multiple_groupers( - education_df, as_index, observed, expected_index, normalize, expected_data + education_df, as_index, warning, observed, expected_index, normalize, expected_data ): # GH#46357 @@ -575,7 +597,8 @@ def test_categorical_multiple_groupers( gp = education_df.groupby( ["country", "education"], as_index=as_index, observed=observed ) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_series = Series( data=expected_data[expected_data > 0.0] if observed else expected_data, @@ -598,7 +621,7 @@ def test_categorical_multiple_groupers( tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize("observed", [False, True]) @pytest.mark.parametrize( "normalize, expected_data", @@ -612,7 +635,7 @@ def test_categorical_multiple_groupers( ], ) def test_categorical_non_groupers( - education_df, as_index, observed, normalize, expected_data + education_df, as_index, warning, observed, normalize, expected_data ): # GH#46357 Test non-observed categories are included in the result, # regardless of `observed` @@ -621,7 +644,8 @@ def test_categorical_non_groupers( education_df["education"] = education_df["education"].astype("category") gp = education_df.groupby("country", as_index=as_index, observed=observed) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_index = [ ("FR", "male", "low"), @@ -689,13 +713,14 @@ def test_mixed_groupings(normalize, expected_label, expected_values): ("level", list("abcd") + ["level_1"], ["a", None, "d", "b", "c", "level_1"]), ], ) -@pytest.mark.parametrize("as_index", [False, True]) -def test_column_label_duplicates(test, columns, expected_names, as_index): +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) +def test_column_label_duplicates(test, columns, expected_names, as_index, warning): # GH 44992 # Test for duplicate input column labels and generated duplicate labels df = DataFrame([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]], columns=columns) expected_data = [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)] - result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() if as_index: expected = Series( data=(1, 1), @@ -735,7 +760,8 @@ def test_ambiguous_grouping(): # Test that groupby is not confused by groupings length equal to row count df = DataFrame({"a": [1, 1]}) gb = df.groupby([1, 1]) - result = gb.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = gb.value_counts() expected = Series([2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"])) tm.assert_series_equal(result, expected) @@ -744,22 +770,25 @@ def test_subset_overlaps_gb_key_raises(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) msg = "Keys {'c1'} in subset cannot be in the groupby column keys." - with pytest.raises(ValueError, match=msg): - df.groupby("c1").value_counts(subset=["c1"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c1"]) def test_subset_doesnt_exist_in_frame(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) msg = "Keys {'c3'} in subset do not exist in the DataFrame." - with pytest.raises(ValueError, match=msg): - df.groupby("c1").value_counts(subset=["c3"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c3"]) def test_subset(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) - result = df.groupby(level=0).value_counts(subset=["c2"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(level=0).value_counts(subset=["c2"]) expected = Series( [1, 2], index=MultiIndex.from_arrays([[0, 1], ["x", "y"]], names=[None, "c2"]) ) @@ -773,7 +802,8 @@ def test_subset_duplicate_columns(): index=[0, 1, 1], columns=["c1", "c2", "c2"], ) - result = df.groupby(level=0).value_counts(subset=["c2"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(level=0).value_counts(subset=["c2"]) expected = Series( [1, 2], index=MultiIndex.from_arrays( diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 577a72d3f5090..f83fd22a6b4ad 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -21,6 +21,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + def tests_value_counts_index_names_category_column(): # GH44324 Missing name of index category column @@ -31,7 +36,8 @@ def tests_value_counts_index_names_category_column(): } ) df["gender"] = df["gender"].astype("category") - result = df.groupby("country")["gender"].value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby("country")["gender"].value_counts() # Construct expected, very specific multiindex df_mi_expected = DataFrame([["US", "female"]], columns=["country", "gender"]) @@ -103,10 +109,12 @@ def rebuild_index(df): } gr = df.groupby(keys, sort=isort) - left = gr["3rd"].value_counts(**kwargs) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + left = gr["3rd"].value_counts(**kwargs) gr = df.groupby(keys, sort=isort) - right = gr["3rd"].apply(Series.value_counts, **kwargs) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + right = gr["3rd"].apply(Series.value_counts, **kwargs) right.index.names = right.index.names[:-1] + ["3rd"] # have to sort on index because of unstable sort on values @@ -135,8 +143,9 @@ def test_series_groupby_value_counts_with_grouper(): dfg = df.groupby(Grouper(freq="1D", key="Datetime")) # have to sort on index because of unstable sort on values xref GH9212 - result = dfg["Food"].value_counts().sort_index() - expected = dfg["Food"].apply(Series.value_counts).sort_index() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() expected.index.names = result.index.names tm.assert_series_equal(result, expected) @@ -148,7 +157,8 @@ def test_series_groupby_value_counts_empty(columns): df = DataFrame(columns=columns) dfg = df.groupby(columns[:-1]) - result = dfg[columns[-1]].value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg[columns[-1]].value_counts() expected = Series([], name=columns[-1], dtype=result.dtype) expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) @@ -161,8 +171,9 @@ def test_series_groupby_value_counts_one_row(columns): df = DataFrame(data=[range(len(columns))], columns=columns) dfg = df.groupby(columns[:-1]) - result = dfg[columns[-1]].value_counts() - expected = df.value_counts().rename(columns[-1]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) tm.assert_series_equal(result, expected) @@ -171,7 +182,8 @@ def test_series_groupby_value_counts_on_categorical(): # GH38672 s = Series(Categorical(["a"], categories=["a", "b"])) - result = s.groupby([0]).value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = s.groupby([0]).value_counts() expected = Series( data=[1, 0], diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index a7d57eee7e5a1..485dff9dd3506 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -9,6 +9,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + class TestSeriesValueCounts: def test_value_counts_datetime(self): @@ -28,15 +33,18 @@ def test_value_counts_datetime(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check DatetimeIndex outputs the same result idx = pd.DatetimeIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_datetime_tz(self): values = [ @@ -55,13 +63,16 @@ def test_value_counts_datetime_tz(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) idx = pd.DatetimeIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_period(self): values = [ @@ -77,15 +88,18 @@ def test_value_counts_period(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check DatetimeIndex outputs the same result idx = pd.PeriodIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical_ordered(self): # most dtypes are tested in tests/base @@ -95,15 +109,18 @@ def test_value_counts_categorical_ordered(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check CategoricalIndex outputs the same result idx = CategoricalIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical_not_ordered(self): values = Categorical([1, 2, 3, 1, 1, 3], ordered=False) @@ -112,27 +129,32 @@ def test_value_counts_categorical_not_ordered(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check CategoricalIndex outputs the same result idx = CategoricalIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical(self): # GH#12835 cats = Categorical(list("abcccb"), categories=list("cabd")) ser = Series(cats, name="xxx") - res = ser.value_counts(sort=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(sort=False) exp_index = CategoricalIndex(list("cabd"), categories=cats.categories) exp = Series([3, 1, 2, 0], name="xxx", index=exp_index) tm.assert_series_equal(res, exp) - res = ser.value_counts(sort=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(sort=True) exp_index = CategoricalIndex(list("cbad"), categories=cats.categories) exp = Series([3, 2, 1, 0], name="xxx", index=exp_index) @@ -141,7 +163,8 @@ def test_value_counts_categorical(self): # check object dtype handles the Series.name as the same # (tested in tests/base) ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx") - res = ser.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts() exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"]) tm.assert_series_equal(res, exp) @@ -152,10 +175,12 @@ def test_value_counts_categorical_with_nan(self): ser = Series(["a", "b", "a"], dtype="category") exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) # same Series via two different constructions --> same behaviour @@ -169,18 +194,21 @@ def test_value_counts_categorical_with_nan(self): for ser in series: # None is a NaN value, so we exclude its count here exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) # we don't exclude the count of None and sort by counts exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) - res = ser.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=False) tm.assert_series_equal(res, exp) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) - res = ser.value_counts(dropna=False, sort=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) @pytest.mark.parametrize( @@ -205,7 +233,8 @@ def test_value_counts_categorical_with_nan(self): ) def test_value_counts_bool_with_nan(self, ser, dropna, exp): # GH32146 - out = ser.value_counts(dropna=dropna) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + out = ser.value_counts(dropna=dropna) tm.assert_series_equal(out, exp) @pytest.mark.parametrize( @@ -223,5 +252,6 @@ def test_value_counts_bool_with_nan(self, ser, dropna, exp): ) def test_value_counts_complex_numbers(self, input_array, expected): # GH 17927 - result = Series(input_array).value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = Series(input_array).value_counts() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b982d247c2707..aa63f1302716f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1150,11 +1150,13 @@ def test_value_counts(self): def test_value_counts_bins(self): s = [1, 2, 3, 4] - result = algos.value_counts(s, bins=1) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = algos.value_counts(s, bins=1) expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)])) tm.assert_series_equal(result, expected) - result = algos.value_counts(s, bins=2, sort=False) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = algos.value_counts(s, bins=2, sort=False) expected = Series( [2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]) ) @@ -1164,7 +1166,8 @@ def test_value_counts_dtypes(self): result = algos.value_counts([1, 1.0]) assert len(result) == 1 - result = algos.value_counts([1, 1.0], bins=1) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = algos.value_counts([1, 1.0], bins=1) assert len(result) == 1 result = algos.value_counts(Series([1, 1.0, "1"])) # object @@ -1200,7 +1203,8 @@ def test_value_counts_datetime_outofbounds(self): datetime(3000, 1, 1), ] ) - res = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + res = s.value_counts() exp_index = Index( [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], @@ -1216,27 +1220,31 @@ def test_value_counts_datetime_outofbounds(self): def test_categorical(self): s = Series(Categorical(list("aaabbc"))) - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts() expected = Series([3, 2, 1], index=CategoricalIndex(["a", "b", "c"])) tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? s = s.cat.as_ordered() - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts() expected.index = expected.index.as_ordered() tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_nans(self): s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan) s.iloc[1] = np.nan - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts() expected = Series( [4, 3, 2], index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]), ) tm.assert_series_equal(result, expected, check_index_type=True) - result = s.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts(dropna=False) expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) @@ -1245,7 +1253,8 @@ def test_categorical_nans(self): Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"]) ) s.iloc[1] = np.nan - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts() expected = Series( [4, 3, 2], index=CategoricalIndex( @@ -1254,7 +1263,8 @@ def test_categorical_nans(self): ) tm.assert_series_equal(result, expected, check_index_type=True) - result = s.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts(dropna=False) expected = Series( [4, 3, 2, 1], index=CategoricalIndex( @@ -1266,7 +1276,8 @@ def test_categorical_nans(self): def test_categorical_zeroes(self): # keep the `d` category with 0 s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True)) - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s.value_counts() expected = Series( [3, 2, 1, 0], index=Categorical( @@ -1278,38 +1289,43 @@ def test_categorical_zeroes(self): def test_dropna(self): # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 - tm.assert_series_equal( - Series([True, True, False]).value_counts(dropna=True), - Series([2, 1], index=[True, False]), - ) - tm.assert_series_equal( - Series([True, True, False]).value_counts(dropna=False), - Series([2, 1], index=[True, False]), - ) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False]), + ) + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False]), + ) - tm.assert_series_equal( - Series([True] * 3 + [False] * 2 + [None] * 5).value_counts(dropna=True), - Series([3, 2], index=Index([True, False], dtype=object)), - ) - tm.assert_series_equal( - Series([True] * 5 + [False] * 3 + [None] * 2).value_counts(dropna=False), - Series([5, 3, 2], index=[True, False, np.nan]), - ) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0]).value_counts(dropna=True), - Series([2, 1], index=[5.0, 10.3]), - ) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0]).value_counts(dropna=False), - Series([2, 1], index=[5.0, 10.3]), - ) + tm.assert_series_equal( + Series([True] * 3 + [False] * 2 + [None] * 5).value_counts(dropna=True), + Series([3, 2], index=Index([True, False], dtype=object)), + ) + tm.assert_series_equal( + Series([True] * 5 + [False] * 3 + [None] * 2).value_counts( + dropna=False + ), + Series([5, 3, 2], index=[True, False, np.nan]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=False), + Series([2, 1], index=[5.0, 10.3]), + ) - tm.assert_series_equal( - Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), - Series([2, 1], index=[5.0, 10.3]), - ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) - result = Series([10.3, 10.3, 5.0, 5.0, 5.0, None]).value_counts(dropna=False) + result = Series([10.3, 10.3, 5.0, 5.0, 5.0, None]).value_counts( + dropna=False + ) expected = Series([3, 2, 1], index=[5.0, 10.3, np.nan]) tm.assert_series_equal(result, expected) @@ -1318,13 +1334,15 @@ def test_value_counts_normalized(self, dtype): # GH12558 s = Series([1] * 2 + [2] * 3 + [np.nan] * 5) s_typed = s.astype(dtype) - result = s_typed.value_counts(normalize=True, dropna=False) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s_typed.value_counts(normalize=True, dropna=False) expected = Series( [0.5, 0.3, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=dtype) ) tm.assert_series_equal(result, expected) - result = s_typed.value_counts(normalize=True, dropna=True) + with tm.assert_produces_warning(FutureWarning, match="name of the result"): + result = s_typed.value_counts(normalize=True, dropna=True) expected = Series([0.6, 0.4], index=Series([2.0, 1.0], dtype=dtype)) tm.assert_series_equal(result, expected)