From daab4620c16468d23afe4b41a15bbfbc33f9cfce Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Dec 2018 16:05:25 -0600 Subject: [PATCH 1/4] COMPAT: Add keepdims and friends to validation xref https://github.com/pandas-dev/pandas/pull/24227 --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/compat/numpy/function.py | 23 +++++++++++++++++--- pandas/core/generic.py | 12 +++++++++-- pandas/tests/series/test_analytics.py | 31 +++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fe5e4a57c557a..89130844d1004 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1397,6 +1397,7 @@ Numeric - Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`) - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) +- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:``). Conversion ^^^^^^^^^^ diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 30fdeca35faf3..417ddd0d8af17 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -189,15 +189,16 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): ALLANY_DEFAULTS = OrderedDict() ALLANY_DEFAULTS['dtype'] = None ALLANY_DEFAULTS['out'] = None +ALLANY_DEFAULTS['keepdims'] = False validate_all = CompatValidator(ALLANY_DEFAULTS, fname='all', method='both', max_fname_arg_count=1) validate_any = CompatValidator(ALLANY_DEFAULTS, fname='any', method='both', max_fname_arg_count=1) -LOGICAL_FUNC_DEFAULTS = dict(out=None) +LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False) validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') -MINMAX_DEFAULTS = dict(out=None) +MINMAX_DEFAULTS = dict(out=None, keepdims=False) validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', method='both', max_fname_arg_count=1) validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', @@ -225,16 +226,32 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): STAT_FUNC_DEFAULTS = OrderedDict() STAT_FUNC_DEFAULTS['dtype'] = None STAT_FUNC_DEFAULTS['out'] = None + +PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +SUM_DEFAULTS['keepdims'] = False +SUM_DEFAULTS['initial'] = None + +MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +MEDIAN_DEFAULTS['overwrite_input'] = False +MEDIAN_DEFAULTS['keepdims'] = False + +STAT_FUNC_DEFAULTS['keepdims'] = False + validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method='kwargs') -validate_sum = CompatValidator(STAT_FUNC_DEFAULTS, fname='sort', +validate_sum = CompatValidator(SUM_DEFAULTS, fname='sum', method='both', max_fname_arg_count=1) +validate_prod = CompatValidator(PROD_DEFAULTS, fname="prod", + method="both", max_fname_arg_count=1) validate_mean = CompatValidator(STAT_FUNC_DEFAULTS, fname='mean', method='both', max_fname_arg_count=1) +validate_median = CompatValidator(MEDIAN_DEFAULTS, fname='median', + method='both', max_fname_arg_count=1) STAT_DDOF_FUNC_DEFAULTS = OrderedDict() STAT_DDOF_FUNC_DEFAULTS['dtype'] = None STAT_DDOF_FUNC_DEFAULTS['out'] = None +STAT_DDOF_FUNC_DEFAULTS['keepdims'] = False validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method='kwargs') diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6eb6bc124c80a..c1a53e1e97803 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10834,7 +10834,12 @@ def _make_min_count_stat_function(cls, name, name1, name2, axis_descr, desc, def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, **kwargs): - nv.validate_stat_func(tuple(), kwargs, fname=name) + if name == 'sum': + nv.validate_sum(tuple(), kwargs) + elif name == 'prod': + nv.validate_prod(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) if skipna is None: skipna = True if axis is None: @@ -10855,7 +10860,10 @@ def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f, @Appender(_num_doc) def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): - nv.validate_stat_func(tuple(), kwargs, fname=name) + if name == 'median': + nv.validate_median(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) if skipna is None: skipna = True if axis is None: diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 81d60aba44b0f..6170c42c466c4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1641,6 +1641,37 @@ def test_value_counts_categorical_not_ordered(self): tm.assert_series_equal(s.value_counts(normalize=True), exp) tm.assert_series_equal(idx.value_counts(normalize=True), exp) + @pytest.mark.parametrize("func", [np.any, np.all]) + @pytest.mark.parametrize("kwargs", [ + dict(keepdims=True), + dict(out=object()), + ]) + def test_validate_any_all_out_keepdims_raises(self, kwargs, func): + s = pd.Series([1, 2]) + param = list(kwargs)[0] + name = func.__name__ + + msg = "the '{}' parameter .* {}".format(param, name) + with pytest.raises(ValueError, match=msg): + func(s, **kwargs) + + def test_validate_sum_initial(self): + s = pd.Series([1, 2]) + with pytest.raises(ValueError, match="the 'initial' .* sum"): + np.sum(s, initial=10) + + def test_validate_median_initial(self): + s = pd.Series([1, 2]) + with pytest.raises(ValueError, + match="the 'overwrite_input' .* median"): + np.median(s, overwrite_input=True) + + def test_validate_stat_keepdims(self): + s = pd.Series([1, 2]) + with pytest.raises(ValueError, + match="the 'keepdims'"): + np.sum(s, keepdims=True) + main_dtypes = [ 'datetime', From 0b8d2b96dea31a3e1c5fda46b1515042693c80d7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 11:29:37 -0600 Subject: [PATCH 2/4] PR number --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 89130844d1004..69bf96b39a945 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1397,7 +1397,7 @@ Numeric - Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`) - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) -- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:``). +- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`). Conversion ^^^^^^^^^^ From 82ce910983ec6ec48c8547bc06fafdb9055dff7b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 13:04:39 -0600 Subject: [PATCH 3/4] Use the method --- pandas/tests/series/test_analytics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 6170c42c466c4..08472b3d352e5 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1664,7 +1664,9 @@ def test_validate_median_initial(self): s = pd.Series([1, 2]) with pytest.raises(ValueError, match="the 'overwrite_input' .* median"): - np.median(s, overwrite_input=True) + # It seems like np.median doesn't dispatch, so we use the + # method instead of the ufunc. + s.median(overwrite_input=True) def test_validate_stat_keepdims(self): s = pd.Series([1, 2]) From db29098039311bbced5dc52c29b847f4d6338a3e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 14:38:12 -0600 Subject: [PATCH 4/4] Skip for old NumPy --- pandas/tests/series/test_analytics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 08472b3d352e5..0d8804dba83c1 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1646,6 +1646,7 @@ def test_value_counts_categorical_not_ordered(self): dict(keepdims=True), dict(out=object()), ]) + @td.skip_if_np_lt_115 def test_validate_any_all_out_keepdims_raises(self, kwargs, func): s = pd.Series([1, 2]) param = list(kwargs)[0] @@ -1655,6 +1656,7 @@ def test_validate_any_all_out_keepdims_raises(self, kwargs, func): with pytest.raises(ValueError, match=msg): func(s, **kwargs) + @td.skip_if_np_lt_115 def test_validate_sum_initial(self): s = pd.Series([1, 2]) with pytest.raises(ValueError, match="the 'initial' .* sum"): @@ -1668,6 +1670,7 @@ def test_validate_median_initial(self): # method instead of the ufunc. s.median(overwrite_input=True) + @td.skip_if_np_lt_115 def test_validate_stat_keepdims(self): s = pd.Series([1, 2]) with pytest.raises(ValueError,