Skip to content

Commit c84ed78

Browse files
committed
WIP fixing bug pandas-dev#9733 where stat functions returned a python scalar for empty series
1 parent a796032 commit c84ed78

File tree

3 files changed

+42
-31
lines changed

3 files changed

+42
-31
lines changed

pandas/core/common.py

+5
Original file line numberDiff line numberDiff line change
@@ -2452,6 +2452,11 @@ def is_integer_dtype(arr_or_dtype):
24522452
return (issubclass(tipo, np.integer) and
24532453
not issubclass(tipo, (np.datetime64, np.timedelta64)))
24542454

2455+
def is_unsigned_integer_dtype(arr_or_dtype):
2456+
tipo = _get_dtype_type(arr_or_dtype)
2457+
return (issubclass(tipo, np.unsignedinteger) and
2458+
not issubclass(tipo, (np.datetime64, np.timedelta64)))
2459+
24552460

24562461
def is_int_or_datetime_dtype(arr_or_dtype):
24572462
tipo = _get_dtype_type(arr_or_dtype)

pandas/core/nanops.py

+10-18
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
is_float, is_integer, is_complex,
2222
is_float_dtype, is_floating_dtype,
2323
is_complex_dtype, is_integer_dtype,
24+
is_unsigned_integer_dtype,
2425
is_bool_dtype, is_object_dtype,
2526
is_datetime64_dtype, is_timedelta64_dtype,
2627
is_datetime_or_timedelta_dtype,
@@ -70,21 +71,7 @@ def f(values, axis=None, skipna=True, **kwds):
7071
if k not in kwds:
7172
kwds[k] = v
7273
try:
73-
if self.zero_value is not None and values.size == 0:
74-
if values.ndim == 1:
75-
76-
# wrap the 0's if needed
77-
if is_timedelta64_dtype(values):
78-
return lib.Timedelta(0)
79-
return values.dtype.type(0)
80-
else:
81-
result_shape = (values.shape[:axis] +
82-
values.shape[axis + 1:])
83-
result = np.empty(result_shape)
84-
result.fill(0)
85-
return result
86-
87-
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
74+
if values.size != 0 and _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
8875
bn_name):
8976
result = bn_func(values, axis=axis, **kwds)
9077

@@ -190,7 +177,10 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
190177
# return a platform independent precision dtype
191178
dtype_max = dtype
192179
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
193-
dtype_max = np.int64
180+
if is_unsigned_integer_dtype(dtype):
181+
dtype_max = np.uint64
182+
else:
183+
dtype_max = np.int64
194184
elif is_float_dtype(dtype):
195185
dtype_max = np.float64
196186

@@ -244,10 +234,10 @@ def nanall(values, axis=None, skipna=True):
244234

245235

246236
@disallow('M8')
247-
@bottleneck_switch(zero_value=0)
237+
@bottleneck_switch()
248238
def nansum(values, axis=None, skipna=True):
249239
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
250-
the_sum = values.sum(axis, dtype=dtype_max)
240+
the_sum = values.sum(axis)
251241
the_sum = _maybe_null_out(the_sum, axis, mask)
252242

253243
return _wrap_results(the_sum, dtype)
@@ -571,6 +561,8 @@ def _get_counts(mask, axis):
571561

572562

573563
def _maybe_null_out(result, axis, mask):
564+
if mask.size == 0:
565+
return result
574566
if axis is not None and getattr(result, 'ndim', False):
575567
null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
576568
if np.any(null_mask):

pandas/tests/test_series.py

+27-13
Original file line numberDiff line numberDiff line change
@@ -2592,12 +2592,6 @@ def testit():
25922592
self.assertRaisesRegexp(NotImplementedError, name, f,
25932593
self.series, numeric_only=True)
25942594

2595-
# Test type of empty Series
2596-
s = Series()
2597-
self.assertEqual(s.dtype, s.sum().dtype)
2598-
s = Series(dtype=np.int64)
2599-
self.assertEqual(s.dtype, s.sum().dtype)
2600-
26012595
testit()
26022596

26032597
try:
@@ -3399,15 +3393,35 @@ def test_ops_consistency_on_empty(self):
33993393
# GH 7869
34003394
# consistency on empty
34013395

3402-
# float
3403-
result = Series(dtype=float).sum()
3404-
self.assertEqual(result,0)
3396+
# Test type of empty Series
34053397

3406-
result = Series(dtype=float).mean()
3407-
self.assertTrue(isnull(result))
3398+
ops = ['median', 'mean', 'sum', 'prod']
34083399

3409-
result = Series(dtype=float).median()
3410-
self.assertTrue(isnull(result))
3400+
# First test numpy types
3401+
for dtype in ['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', 'float16', 'float32',
3402+
'float64', 'complex64', 'complex128']:
3403+
s = Series(dtype=dtype)
3404+
for op in ops:
3405+
result = getattr(s, op)()
3406+
np_type = getattr(np, dtype)
3407+
reference = getattr(np, op)(np_type([]))
3408+
if np.isnan(reference):
3409+
self.assertTrue(np.isnan(result), msg="Expecting nan, got %s" % (str(result)))
3410+
else:
3411+
self.assertEqual(result.dtype, reference.dtype, msg="Failed to %s on %s: returned type %s, expected %s" % (op, dtype, str(result.dtype), str(reference.dtype)))
3412+
self.assertEqual(result, reference,
3413+
msg='Different result for empty %s with dtype=%s: expected %s but received %s' %
3414+
(op, dtype, str(reference), str(result)))
3415+
3416+
# Test str/unicode types
3417+
str_series = Series(dtype='str')
3418+
unicode_series = Series(dtype='unicode')
3419+
for op in ['median', 'mean', 'prod']:
3420+
print op
3421+
self.assertTrue(np.isnan(getattr(str_series, op)()))
3422+
self.assertTrue(np.isnan(getattr(unicode_series, op)()))
3423+
self.assertEqual('', str_series.sum())
3424+
self.assertEqual('', unicode_series.sum())
34113425

34123426
# timedelta64[ns]
34133427
result = Series(dtype='m8[ns]').sum()

0 commit comments

Comments
 (0)