Skip to content

Commit 69ffca6

Browse files
committed
Fixed bug pandas-dev#9733 where stat functions returned a python scalar for empty series
1 parent d406273 commit 69ffca6

File tree

4 files changed

+61
-32
lines changed

4 files changed

+61
-32
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -701,3 +701,5 @@ Bug Fixes
701701
- Bug in ``iloc`` allowing memory outside bounds of a Series to be accessed with negative integers (:issue:`10779`)
702702
- Bug in ``read_msgpack`` where encoding is not respected (:issue:`10580`)
703703
- Bug preventing access to the first index when using ``iloc`` with a list containing the appropriate negative integer (:issue:`10547`, :issue:`10779`)
704+
705+
- Bug in stat functions (``sum``, ``mean``, etc) returning a python scalar for empty series (:issue:`9733`)

pandas/core/common.py

+7
Original file line numberDiff line numberDiff line change
@@ -2484,11 +2484,18 @@ def is_integer_dtype(arr_or_dtype):
24842484
return (issubclass(tipo, np.integer) and
24852485
not issubclass(tipo, (np.datetime64, np.timedelta64)))
24862486

2487+
24872488
def is_int64_dtype(arr_or_dtype):
24882489
tipo = _get_dtype_type(arr_or_dtype)
24892490
return issubclass(tipo, np.int64)
24902491

24912492

2493+
def is_unsigned_integer_dtype(arr_or_dtype):
2494+
tipo = _get_dtype_type(arr_or_dtype)
2495+
return (issubclass(tipo, np.unsignedinteger) and
2496+
not issubclass(tipo, (np.datetime64, np.timedelta64)))
2497+
2498+
24922499
def is_int_or_datetime_dtype(arr_or_dtype):
24932500
tipo = _get_dtype_type(arr_or_dtype)
24942501
return (issubclass(tipo, np.integer) or

pandas/core/nanops.py

+16-25
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
is_float, is_integer, is_complex,
1919
is_float_dtype,
2020
is_complex_dtype, is_integer_dtype,
21+
is_unsigned_integer_dtype,
2122
is_bool_dtype, is_object_dtype,
2223
is_datetime64_dtype, is_timedelta64_dtype,
2324
is_datetime_or_timedelta_dtype, _get_dtype,
@@ -67,21 +68,7 @@ def f(values, axis=None, skipna=True, **kwds):
6768
if k not in kwds:
6869
kwds[k] = v
6970
try:
70-
if self.zero_value is not None and values.size == 0:
71-
if values.ndim == 1:
72-
73-
# wrap the 0's if needed
74-
if is_timedelta64_dtype(values):
75-
return lib.Timedelta(0)
76-
return 0
77-
else:
78-
result_shape = (values.shape[:axis] +
79-
values.shape[axis + 1:])
80-
result = np.empty(result_shape)
81-
result.fill(0)
82-
return result
83-
84-
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
71+
if values.size != 0 and _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
8572
bn_name):
8673
result = bn_func(values, axis=axis, **kwds)
8774

@@ -187,7 +174,10 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
187174
# return a platform independent precision dtype
188175
dtype_max = dtype
189176
if is_integer_dtype(dtype) or is_bool_dtype(dtype):
190-
dtype_max = np.int64
177+
if is_unsigned_integer_dtype(dtype):
178+
dtype_max = np.uint64
179+
else:
180+
dtype_max = np.int64
191181
elif is_float_dtype(dtype):
192182
dtype_max = np.float64
193183

@@ -241,14 +231,14 @@ def nanall(values, axis=None, skipna=True):
241231

242232

243233
@disallow('M8')
244-
@bottleneck_switch(zero_value=0)
234+
@bottleneck_switch()
245235
def nansum(values, axis=None, skipna=True):
246236
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
247237
dtype_sum = dtype_max
248238
if is_float_dtype(dtype):
249239
dtype_sum = dtype
250240
the_sum = values.sum(axis, dtype=dtype_sum)
251-
the_sum = _maybe_null_out(the_sum, axis, mask)
241+
the_sum = _maybe_null_out(the_sum, axis, mask, False)
252242

253243
return _wrap_results(the_sum, dtype)
254244

@@ -414,7 +404,7 @@ def nanmin(values, axis=None, skipna=True):
414404
result = values.min(axis)
415405

416406
result = _wrap_results(result, dtype)
417-
return _maybe_null_out(result, axis, mask)
407+
return _maybe_null_out(result, axis, mask, True)
418408

419409

420410
@bottleneck_switch()
@@ -445,7 +435,7 @@ def nanmax(values, axis=None, skipna=True):
445435
result = values.max(axis)
446436

447437
result = _wrap_results(result, dtype)
448-
return _maybe_null_out(result, axis, mask)
438+
return _maybe_null_out(result, axis, mask, True)
449439

450440

451441
def nanargmax(values, axis=None, skipna=True):
@@ -554,7 +544,7 @@ def nanprod(values, axis=None, skipna=True):
554544
values = values.copy()
555545
values[mask] = 1
556546
result = values.prod(axis)
557-
return _maybe_null_out(result, axis, mask)
547+
return _maybe_null_out(result, axis, mask, False)
558548

559549

560550
def _maybe_arg_null_out(result, axis, mask, skipna):
@@ -588,9 +578,11 @@ def _get_counts(mask, axis, dtype=float):
588578
return np.array(count, dtype=dtype)
589579

590580

591-
def _maybe_null_out(result, axis, mask):
581+
def _maybe_null_out(result, axis, mask, null_on_empty):
592582
if axis is not None and getattr(result, 'ndim', False):
593583
null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
584+
if not null_on_empty:
585+
null_mask = null_mask & (mask.shape[axis] > 0)
594586
if np.any(null_mask):
595587
if np.iscomplexobj(result):
596588
result = result.astype('c16')
@@ -599,9 +591,8 @@ def _maybe_null_out(result, axis, mask):
599591
result[null_mask] = np.nan
600592
else:
601593
null_mask = mask.size - mask.sum()
602-
if null_mask == 0:
603-
result = np.nan
604-
594+
if null_mask == 0 and (mask.size > 0 or null_on_empty):
595+
return np.nan
605596
return result
606597

607598

pandas/tests/test_series.py

+36-7
Original file line numberDiff line numberDiff line change
@@ -3616,15 +3616,44 @@ def test_ops_consistency_on_empty(self):
36163616
# GH 7869
36173617
# consistency on empty
36183618

3619-
# float
3620-
result = Series(dtype=float).sum()
3621-
self.assertEqual(result,0)
3619+
# Test type of empty Series
36223620

3623-
result = Series(dtype=float).mean()
3624-
self.assertTrue(isnull(result))
3621+
ops = ['median', 'mean', 'sum', 'prod']
36253622

3626-
result = Series(dtype=float).median()
3627-
self.assertTrue(isnull(result))
3623+
# First test numpy types
3624+
# Just make sure that numpy and pandas have the same return type
3625+
for dtype in ['int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', 'float16', 'float32',
3626+
'float64', 'complex64', 'complex128']:
3627+
s = Series(dtype=dtype)
3628+
for op in ops:
3629+
result = getattr(s, op)()
3630+
np_type = getattr(np, dtype)
3631+
reference = getattr(np, op)(np_type([]))
3632+
if np.isnan(reference):
3633+
self.assertTrue(np.isnan(result),
3634+
msg="%s on empty %s Series: expecting nan, got %s" % (op, dtype, str(result)))
3635+
else:
3636+
self.assertEqual(result.dtype, reference.dtype,
3637+
msg="%s on empty %s Series: returned type %s, expected %s" %
3638+
(op, dtype, str(result.dtype), str(reference.dtype)))
3639+
self.assertEqual(result, reference,
3640+
msg='%s on empty %s Series: expected %s but received %s' %
3641+
(op, dtype, str(reference), str(result)))
3642+
3643+
# Test str/unicode types
3644+
str_series = Series(dtype='str')
3645+
unicode_series = Series(dtype='unicode')
3646+
for op in ['median', 'mean', 'prod']:
3647+
# TODO: these operations should raise type errors
3648+
# self.assertRaises(TypeError, getattr(str_series, op)(),
3649+
# msg="%s on empty str Series should raise TypeError" % op)
3650+
# self.assertRaises(TypeError, getattr(unicode_series, op)(),
3651+
# msg="%s on empty unicode Series should raise TypeError" % op)
3652+
pass
3653+
3654+
# TODO: these operations should return empty strings
3655+
# self.assertEqual('', str_series.sum())
3656+
# self.assertEqual('', unicode_series.sum())
36283657

36293658
# timedelta64[ns]
36303659
result = Series(dtype='m8[ns]').sum()

0 commit comments

Comments
 (0)