Skip to content

Commit eb4762c

Browse files
committed
COMPAT: Expand compatibility with fromnumeric.py
Expands compatibility with fromnumeric.py in tslib.pyx and puts checks in window.py, groupby.py, and resample.py to ensure that pandas functions such as 'mean' are not called via the numpy library. Closes pandas-devgh-12811.
1 parent 070e877 commit eb4762c

File tree

12 files changed

+316
-49
lines changed

12 files changed

+316
-49
lines changed

doc/source/whatsnew/v0.18.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ API changes
4646

4747

4848
- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`)
49-
49+
- Compatibility with NumPy array methods has been expanded to timestamps (:issue: `12811`)
50+
- An ``UnsupportedFunctionCall`` error is now raised if groupby or resample functions like ``mean`` are called via NumPy (:issue: `12811`)
5051

5152
.. _whatsnew_0182.api.tolist:
5253

pandas/compat/numpy/function.py

+75-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from numpy import ndarray
2222
from pandas.util.validators import (validate_args, validate_kwargs,
2323
validate_args_and_kwargs)
24-
from pandas.core.common import is_integer
24+
from pandas.core.common import is_integer, UnsupportedFunctionCall
2525
from pandas.compat import OrderedDict
2626

2727

@@ -245,3 +245,77 @@ def validate_transpose_for_generic(inst, kwargs):
245245
msg += " for {klass} instances".format(klass=klass)
246246

247247
raise ValueError(msg)
248+
249+
250+
def validate_window_func(name, args, kwargs):
251+
numpy_args = ('axis', 'dtype', 'out')
252+
msg = ("numpy operations are not "
253+
"valid with window objects. "
254+
"Use .{func}() directly instead ".format(func=name))
255+
256+
if len(args) > 0:
257+
raise UnsupportedFunctionCall(msg)
258+
259+
for arg in numpy_args:
260+
if arg in kwargs:
261+
raise UnsupportedFunctionCall(msg)
262+
263+
264+
def validate_rolling_func(name, args, kwargs):
265+
numpy_args = ('axis', 'dtype', 'out')
266+
msg = ("numpy operations are not "
267+
"valid with window objects. "
268+
"Use .rolling(...).{func}() instead ".format(func=name))
269+
270+
if len(args) > 0:
271+
raise UnsupportedFunctionCall(msg)
272+
273+
for arg in numpy_args:
274+
if arg in kwargs:
275+
raise UnsupportedFunctionCall(msg)
276+
277+
278+
def validate_expanding_func(name, args, kwargs):
279+
numpy_args = ('axis', 'dtype', 'out')
280+
msg = ("numpy operations are not "
281+
"valid with window objects. "
282+
"Use .expanding(...).{func}() instead ".format(func=name))
283+
284+
if len(args) > 0:
285+
raise UnsupportedFunctionCall(msg)
286+
287+
for arg in numpy_args:
288+
if arg in kwargs:
289+
raise UnsupportedFunctionCall(msg)
290+
291+
292+
def validate_groupby_func(name, args, kwargs):
293+
"""
294+
'args' and 'kwargs' should be empty because all of
295+
their necessary parameters are explicitly listed in
296+
the function signature
297+
"""
298+
if len(args) + len(kwargs) > 0:
299+
raise UnsupportedFunctionCall((
300+
"numpy operations are not valid "
301+
"with groupby. Use .groupby(...)."
302+
"{func}() instead".format(func=name)))
303+
304+
RESAMPLER_NUMPY_OPS = ('min', 'max', 'sum', 'prod',
305+
'mean', 'std', 'var')
306+
307+
308+
def validate_resampler_func(method, args, kwargs):
309+
"""
310+
'args' and 'kwargs' should be empty because all of
311+
their necessary parameters are explicitly listed in
312+
the function signature
313+
"""
314+
if len(args) + len(kwargs) > 0:
315+
if method in RESAMPLER_NUMPY_OPS:
316+
raise UnsupportedFunctionCall((
317+
"numpy operations are not valid "
318+
"with resample. Use .resample(...)."
319+
"{func}() instead".format(func=method)))
320+
else:
321+
raise TypeError("too many arguments passed in")

pandas/core/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ class AmbiguousIndexError(PandasError, KeyError):
4141
pass
4242

4343

44+
class UnsupportedFunctionCall(ValueError):
45+
pass
46+
47+
4448
class AbstractMethodError(NotImplementedError):
4549
"""Raise this error instead of NotImplementedError for abstract methods
4650
while keeping compatibility with Python 2 and Python 3.

pandas/core/generic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5299,7 +5299,7 @@ def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f):
52995299
@Appender(_num_doc)
53005300
def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
53015301
**kwargs):
5302-
nv.validate_stat_func(tuple(), kwargs)
5302+
nv.validate_stat_func(tuple(), kwargs, fname=name)
53035303
if skipna is None:
53045304
skipna = True
53055305
if axis is None:
@@ -5319,7 +5319,7 @@ def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f):
53195319
@Appender(_num_ddof_doc)
53205320
def stat_func(self, axis=None, skipna=None, level=None, ddof=1,
53215321
numeric_only=None, **kwargs):
5322-
nv.validate_stat_ddof_func(tuple(), kwargs)
5322+
nv.validate_stat_ddof_func(tuple(), kwargs, fname=name)
53235323
if skipna is None:
53245324
skipna = True
53255325
if axis is None:
@@ -5340,7 +5340,7 @@ def _make_cum_function(cls, name, name1, name2, axis_descr, desc, accum_func,
53405340
@Appender("Return cumulative {0} over requested axis.".format(name) +
53415341
_cnum_doc)
53425342
def cum_func(self, axis=None, dtype=None, out=None, skipna=True, **kwargs):
5343-
nv.validate_cum_func(tuple(), kwargs)
5343+
nv.validate_cum_func(tuple(), kwargs, fname=name)
53445344
if axis is None:
53455345
axis = self._stat_axis_number
53465346
else:
@@ -5374,7 +5374,7 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f):
53745374
@Appender(_bool_doc)
53755375
def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
53765376
**kwargs):
5377-
nv.validate_logical_func(tuple(), kwargs)
5377+
nv.validate_logical_func(tuple(), kwargs, fname=name)
53785378
if skipna is None:
53795379
skipna = True
53805380
if axis is None:

pandas/core/groupby.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
callable, map
1212
)
1313
from pandas import compat
14+
from pandas.compat.numpy import function as nv
1415
from pandas.compat.numpy import _np_version_under1p8
1516
from pandas.core.base import (PandasObject, SelectionMixin, GroupByError,
1617
DataError, SpecificationError)
@@ -954,12 +955,13 @@ def count(self):
954955

955956
@Substitution(name='groupby')
956957
@Appender(_doc_template)
957-
def mean(self):
958+
def mean(self, *args, **kwargs):
958959
"""
959960
Compute mean of groups, excluding missing values
960961
961962
For multiple groupings, the result index will be a MultiIndex
962963
"""
964+
nv.validate_groupby_func('mean', args, kwargs)
963965
try:
964966
return self._cython_agg_general('mean')
965967
except GroupByError:
@@ -993,7 +995,7 @@ def f(x):
993995

994996
@Substitution(name='groupby')
995997
@Appender(_doc_template)
996-
def std(self, ddof=1):
998+
def std(self, ddof=1, *args, **kwargs):
997999
"""
9981000
Compute standard deviation of groups, excluding missing values
9991001
@@ -1005,12 +1007,13 @@ def std(self, ddof=1):
10051007
degrees of freedom
10061008
"""
10071009

1008-
# todo, implement at cython level?
1010+
# TODO: implement at Cython level?
1011+
nv.validate_groupby_func('std', args, kwargs)
10091012
return np.sqrt(self.var(ddof=ddof))
10101013

10111014
@Substitution(name='groupby')
10121015
@Appender(_doc_template)
1013-
def var(self, ddof=1):
1016+
def var(self, ddof=1, *args, **kwargs):
10141017
"""
10151018
Compute variance of groups, excluding missing values
10161019
@@ -1021,7 +1024,7 @@ def var(self, ddof=1):
10211024
ddof : integer, default 1
10221025
degrees of freedom
10231026
"""
1024-
1027+
nv.validate_groupby_func('var', args, kwargs)
10251028
if ddof == 1:
10261029
return self._cython_agg_general('var')
10271030
else:
@@ -1317,17 +1320,19 @@ def cumcount(self, ascending=True):
13171320

13181321
@Substitution(name='groupby')
13191322
@Appender(_doc_template)
1320-
def cumprod(self, axis=0):
1323+
def cumprod(self, axis=0, *args, **kwargs):
13211324
"""Cumulative product for each group"""
1325+
nv.validate_groupby_func('cumprod', args, kwargs)
13221326
if axis != 0:
13231327
return self.apply(lambda x: x.cumprod(axis=axis))
13241328

13251329
return self._cython_transform('cumprod')
13261330

13271331
@Substitution(name='groupby')
13281332
@Appender(_doc_template)
1329-
def cumsum(self, axis=0):
1333+
def cumsum(self, axis=0, *args, **kwargs):
13301334
"""Cumulative sum for each group"""
1335+
nv.validate_groupby_func('cumsum', args, kwargs)
13311336
if axis != 0:
13321337
return self.apply(lambda x: x.cumprod(axis=axis))
13331338

0 commit comments

Comments
 (0)