Skip to content

Commit 6dbcc83

Browse files
committed
Merge pull request #3731 from cpcloud/raise-on-datetime-ufuncs-3726
API: raise TypeError on most datetime64 reduction ops
2 parents 7a219e7 + 1b94cfb commit 6dbcc83

File tree

6 files changed

+142
-64
lines changed

6 files changed

+142
-64
lines changed

RELEASE.rst

+7
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ pandas 0.11.1
9797
in your calls.
9898
- Do not allow astypes on ``datetime64[ns]`` except to ``object``, and
9999
``timedelta64[ns]`` to ``object/int`` (GH3425_)
100+
- The behavior of ``datetime64`` dtypes has changed with respect to certain
101+
so-called reduction operations (GH3726_). The following operations now
102+
raise a ``TypeError`` when perfomed on a ``Series`` and return an *empty*
103+
``Series`` when performed on a ``DataFrame`` similar to performing these
104+
operations on, for example, a ``DataFrame`` of ``slice`` objects:
105+
- sum, prod, mean, std, var, skew, kurt, corr, and cov
100106
- Do not allow datetimelike/timedeltalike creation except with valid types
101107
(e.g. cannot pass ``datetime64[ms]``) (GH3423_)
102108
- Add ``squeeze`` keyword to ``groupby`` to allow reduction from
@@ -294,6 +300,7 @@ pandas 0.11.1
294300
.. _GH3748: https://github.com/pydata/pandas/issues/3748
295301
.. _GH3741: https://github.com/pydata/pandas/issues/3741
296302
.. _GH3750: https://github.com/pydata/pandas/issues/3750
303+
.. _GH3726: https://github.com/pydata/pandas/issues/3726
297304

298305
pandas 0.11.0
299306
=============

doc/source/v0.11.1.txt

+13
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@ API changes
128128
- ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for
129129
their first argument (GH3702_)
130130

131+
- Do not allow astypes on ``datetime64[ns]`` except to ``object``, and
132+
``timedelta64[ns]`` to ``object/int`` (GH3425_)
133+
134+
- The behavior of ``datetime64`` dtypes has changed with respect to certain
135+
so-called reduction operations (GH3726_). The following operations now
136+
raise a ``TypeError`` when perfomed on a ``Series`` and return an *empty*
137+
``Series`` when performed on a ``DataFrame`` similar to performing these
138+
operations on, for example, a ``DataFrame`` of ``slice`` objects:
139+
140+
- sum, prod, mean, std, var, skew, kurt, corr, and cov
141+
131142
Enhancements
132143
~~~~~~~~~~~~
133144

@@ -345,3 +356,5 @@ on GitHub for a complete list.
345356
.. _GH3696: https://github.com/pydata/pandas/issues/3696
346357
.. _GH3667: https://github.com/pydata/pandas/issues/3667
347358
.. _GH3741: https://github.com/pydata/pandas/issues/3741
359+
.. _GH3726: https://github.com/pydata/pandas/issues/3726
360+
.. _GH3425: https://github.com/pydata/pandas/issues/3425

pandas/core/nanops.py

+101-48
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import sys
2+
import itertools
3+
import functools
24

35
import numpy as np
46

57
from pandas.core.common import isnull, notnull
68
import pandas.core.common as com
7-
import pandas.core.config as cf
89
import pandas.lib as lib
910
import pandas.algos as algos
1011
import pandas.hashtable as _hash
@@ -17,41 +18,70 @@
1718
_USE_BOTTLENECK = False
1819

1920

20-
def _bottleneck_switch(bn_name, alt, zero_value=None, **kwargs):
21-
try:
22-
bn_func = getattr(bn, bn_name)
23-
except (AttributeError, NameError): # pragma: no cover
24-
bn_func = None
21+
class disallow(object):
22+
def __init__(self, *dtypes):
23+
super(disallow, self).__init__()
24+
self.dtypes = tuple(np.dtype(dtype).type for dtype in dtypes)
25+
26+
def check(self, obj):
27+
return hasattr(obj, 'dtype') and issubclass(obj.dtype.type,
28+
self.dtypes)
29+
30+
def __call__(self, f):
31+
@functools.wraps(f)
32+
def _f(*args, **kwargs):
33+
obj_iter = itertools.chain(args, kwargs.itervalues())
34+
if any(self.check(obj) for obj in obj_iter):
35+
raise TypeError('reduction operation {0!r} not allowed for '
36+
'this dtype'.format(f.__name__.replace('nan',
37+
'')))
38+
return f(*args, **kwargs)
39+
return _f
40+
41+
42+
class bottleneck_switch(object):
43+
def __init__(self, zero_value=None, **kwargs):
44+
self.zero_value = zero_value
45+
self.kwargs = kwargs
46+
47+
def __call__(self, alt):
48+
bn_name = alt.__name__
2549

26-
def f(values, axis=None, skipna=True, **kwds):
27-
if len(kwargs) > 0:
28-
for k, v in kwargs.iteritems():
29-
if k not in kwds:
30-
kwds[k] = v
3150
try:
32-
if zero_value is not None and values.size == 0:
33-
if values.ndim == 1:
34-
return 0
51+
bn_func = getattr(bn, bn_name)
52+
except (AttributeError, NameError): # pragma: no cover
53+
bn_func = None
54+
55+
@functools.wraps(alt)
56+
def f(values, axis=None, skipna=True, **kwds):
57+
if len(self.kwargs) > 0:
58+
for k, v in self.kwargs.iteritems():
59+
if k not in kwds:
60+
kwds[k] = v
61+
try:
62+
if self.zero_value is not None and values.size == 0:
63+
if values.ndim == 1:
64+
return 0
65+
else:
66+
result_shape = (values.shape[:axis] +
67+
values.shape[axis + 1:])
68+
result = np.empty(result_shape)
69+
result.fill(0)
70+
return result
71+
72+
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype):
73+
result = bn_func(values, axis=axis, **kwds)
74+
# prefer to treat inf/-inf as NA
75+
if _has_infs(result):
76+
result = alt(values, axis=axis, skipna=skipna, **kwds)
3577
else:
36-
result_shape = values.shape[:
37-
axis] + values.shape[axis + 1:]
38-
result = np.empty(result_shape)
39-
result.fill(0)
40-
return result
41-
42-
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype):
43-
result = bn_func(values, axis=axis, **kwds)
44-
# prefer to treat inf/-inf as NA
45-
if _has_infs(result):
4678
result = alt(values, axis=axis, skipna=skipna, **kwds)
47-
else:
79+
except Exception:
4880
result = alt(values, axis=axis, skipna=skipna, **kwds)
49-
except Exception:
50-
result = alt(values, axis=axis, skipna=skipna, **kwds)
5181

52-
return result
82+
return result
5383

54-
return f
84+
return f
5585

5686

5787
def _bn_ok_dtype(dt):
@@ -166,13 +196,17 @@ def nanall(values, axis=None, skipna=True):
166196
values, mask, dtype = _get_values(values, skipna, True, copy=skipna)
167197
return values.all(axis)
168198

169-
def _nansum(values, axis=None, skipna=True):
199+
@disallow('M8')
200+
@bottleneck_switch(zero_value=0)
201+
def nansum(values, axis=None, skipna=True):
170202
values, mask, dtype = _get_values(values, skipna, 0)
171203
the_sum = values.sum(axis)
172204
the_sum = _maybe_null_out(the_sum, axis, mask)
173205
return the_sum
174206

175-
def _nanmean(values, axis=None, skipna=True):
207+
@disallow('M8')
208+
@bottleneck_switch()
209+
def nanmean(values, axis=None, skipna=True):
176210
values, mask, dtype = _get_values(values, skipna, 0)
177211
the_sum = _ensure_numeric(values.sum(axis))
178212
count = _get_counts(mask, axis)
@@ -186,8 +220,9 @@ def _nanmean(values, axis=None, skipna=True):
186220
the_mean = the_sum / count if count > 0 else np.nan
187221
return the_mean
188222

189-
190-
def _nanmedian(values, axis=None, skipna=True):
223+
@disallow('M8')
224+
@bottleneck_switch()
225+
def nanmedian(values, axis=None, skipna=True):
191226
def get_median(x):
192227
mask = notnull(x)
193228
if not skipna and not mask.all():
@@ -197,13 +232,31 @@ def get_median(x):
197232
if values.dtype != np.float64:
198233
values = values.astype('f8')
199234

200-
if values.ndim > 1:
201-
return np.apply_along_axis(get_median, axis, values)
202-
else:
203-
return get_median(values)
235+
notempty = values.size
204236

205-
206-
def _nanvar(values, axis=None, skipna=True, ddof=1):
237+
# an array from a frame
238+
if values.ndim > 1:
239+
# there's a non-empty array to apply over otherwise numpy raises
240+
if notempty:
241+
return np.apply_along_axis(get_median, axis, values)
242+
243+
# must return the correct shape, but median is not defined for the
244+
# empty set so return nans of shape "everything but the passed axis"
245+
# since "axis" is where the reduction would occur if we had a nonempty
246+
# array
247+
shp = np.array(values.shape)
248+
dims = np.arange(values.ndim)
249+
ret = np.empty(shp[dims != axis])
250+
ret.fill(np.nan)
251+
return ret
252+
253+
# otherwise return a scalar value
254+
return get_median(values) if notempty else np.nan
255+
256+
257+
@disallow('M8')
258+
@bottleneck_switch(ddof=1)
259+
def nanvar(values, axis=None, skipna=True, ddof=1):
207260
if not isinstance(values.dtype.type, np.floating):
208261
values = values.astype('f8')
209262

@@ -223,7 +276,8 @@ def _nanvar(values, axis=None, skipna=True, ddof=1):
223276
return np.fabs((XX - X ** 2 / count) / (count - ddof))
224277

225278

226-
def _nanmin(values, axis=None, skipna=True):
279+
@bottleneck_switch()
280+
def nanmin(values, axis=None, skipna=True):
227281
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf')
228282

229283
# numpy 1.6.1 workaround in Python 3.x
@@ -247,7 +301,8 @@ def _nanmin(values, axis=None, skipna=True):
247301
return _maybe_null_out(result, axis, mask)
248302

249303

250-
def _nanmax(values, axis=None, skipna=True):
304+
@bottleneck_switch()
305+
def nanmax(values, axis=None, skipna=True):
251306
values, mask, dtype = _get_values(values, skipna, fill_value_typ ='-inf')
252307

253308
# numpy 1.6.1 workaround in Python 3.x
@@ -291,14 +346,8 @@ def nanargmin(values, axis=None, skipna=True):
291346
result = _maybe_arg_null_out(result, axis, mask, skipna)
292347
return result
293348

294-
nansum = _bottleneck_switch('nansum', _nansum, zero_value=0)
295-
nanmean = _bottleneck_switch('nanmean', _nanmean)
296-
nanmedian = _bottleneck_switch('nanmedian', _nanmedian)
297-
nanvar = _bottleneck_switch('nanvar', _nanvar, ddof=1)
298-
nanmin = _bottleneck_switch('nanmin', _nanmin)
299-
nanmax = _bottleneck_switch('nanmax', _nanmax)
300-
301349

350+
@disallow('M8')
302351
def nanskew(values, axis=None, skipna=True):
303352
if not isinstance(values.dtype.type, np.floating):
304353
values = values.astype('f8')
@@ -332,6 +381,7 @@ def nanskew(values, axis=None, skipna=True):
332381
return result
333382

334383

384+
@disallow('M8')
335385
def nankurt(values, axis=None, skipna=True):
336386
if not isinstance(values.dtype.type, np.floating):
337387
values = values.astype('f8')
@@ -365,6 +415,7 @@ def nankurt(values, axis=None, skipna=True):
365415
return result
366416

367417

418+
@disallow('M8')
368419
def nanprod(values, axis=None, skipna=True):
369420
mask = isnull(values)
370421
if skipna and not issubclass(values.dtype.type, np.integer):
@@ -423,6 +474,7 @@ def _zero_out_fperr(arg):
423474
return 0 if np.abs(arg) < 1e-14 else arg
424475

425476

477+
@disallow('M8')
426478
def nancorr(a, b, method='pearson', min_periods=None):
427479
"""
428480
a, b: ndarrays
@@ -469,6 +521,7 @@ def _spearman(a, b):
469521
return _cor_methods[method]
470522

471523

524+
@disallow('M8')
472525
def nancov(a, b, min_periods=None):
473526
if len(a) != len(b):
474527
raise AssertionError('Operands to nancov must have same size')

pandas/core/series.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -97,21 +97,15 @@ def convert_to_array(values):
9797
values = np.array([values])
9898
inferred_type = lib.infer_dtype(values)
9999
if inferred_type in set(['datetime64','datetime','date','time']):
100-
if isinstance(values, pa.Array) and com.is_datetime64_dtype(values):
101-
pass
102-
else:
100+
if not (isinstance(values, pa.Array) and com.is_datetime64_dtype(values)):
103101
values = tslib.array_to_datetime(values)
104102
elif inferred_type in set(['timedelta','timedelta64']):
105103
# need to convert timedelta to ns here
106104
# safest to convert it to an object arrany to process
107-
if isinstance(values, pa.Array) and com.is_timedelta64_dtype(values):
108-
pass
109-
else:
105+
if not (isinstance(values, pa.Array) and com.is_timedelta64_dtype(values)):
110106
values = com._possibly_cast_to_timedelta(values)
111107
elif inferred_type in set(['integer']):
112-
if values.dtype == 'timedelta64[ns]':
113-
pass
114-
elif values.dtype.kind == 'm':
108+
if values.dtype.kind == 'm':
115109
values = values.astype('timedelta64[ns]')
116110
else:
117111
values = pa.array(values)
@@ -125,9 +119,9 @@ def convert_to_array(values):
125119
is_datetime_rhs = com.is_datetime64_dtype(rvalues)
126120

127121
# 2 datetimes or 2 timedeltas
128-
if (is_timedelta_lhs and is_timedelta_rhs) or (is_datetime_lhs and is_datetime_rhs):
129-
130-
if is_datetime_lhs and name not in ['__sub__']:
122+
if (is_timedelta_lhs and is_timedelta_rhs) or (is_datetime_lhs and
123+
is_datetime_rhs):
124+
if is_datetime_lhs and name != '__sub__':
131125
raise TypeError("can only operate on a datetimes for subtraction, "
132126
"but the operator [%s] was passed" % name)
133127
elif is_timedelta_lhs and name not in ['__add__','__sub__']:

pandas/tests/test_frame.py

+9
Original file line numberDiff line numberDiff line change
@@ -9167,6 +9167,15 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True,
91679167

91689168
f = getattr(frame, name)
91699169

9170+
if not ('max' in name or 'min' in name or 'count' in name):
9171+
df = DataFrame({'b': date_range('1/1/2001', periods=2)})
9172+
_f = getattr(df, name)
9173+
print df
9174+
self.assertFalse(len(_f()))
9175+
9176+
df['a'] = range(len(df))
9177+
self.assert_(len(getattr(df, name)()))
9178+
91709179
if has_skipna:
91719180
def skipna_wrapper(x):
91729181
nona = x.dropna().values

pandas/tests/test_series.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -1460,10 +1460,6 @@ def test_sum_inf(self):
14601460
with cf.option_context("mode.use_inf_as_null", True):
14611461
assert_almost_equal(s.sum(), s2.sum())
14621462

1463-
res = nanops.nansum(arr, axis=1)
1464-
expected = nanops._nansum(arr, axis=1)
1465-
assert_almost_equal(res, expected)
1466-
14671463
res = nanops.nansum(arr, axis=1)
14681464
self.assertTrue(np.isinf(res).all())
14691465

@@ -1594,6 +1590,12 @@ def testit():
15941590
# add some NaNs
15951591
self.series[5:15] = np.NaN
15961592

1593+
1594+
# idxmax, idxmin, min, and max are valid for dates
1595+
if not ('max' in name or 'min' in name):
1596+
ds = Series(date_range('1/1/2001', periods=10))
1597+
self.assertRaises(TypeError, f, ds)
1598+
15971599
# skipna or no
15981600
self.assert_(notnull(f(self.series)))
15991601
self.assert_(isnull(f(self.series, skipna=False)))

0 commit comments

Comments
 (0)