Skip to content

Commit de9f3c1

Browse files
committed
BUG: fix sum over integer frames
1 parent 29a709c commit de9f3c1

File tree

4 files changed

+41
-6
lines changed

4 files changed

+41
-6
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ pandas 0.13
5454
representation of the index (:issue:`4136`)
5555
- Fix running of stata IO tests. Now uses temporary files to write
5656
(:issue:`4353`)
57+
- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean``
58+
for integer valued frames (:issue:`4365`)
5759

5860
pandas 0.12
5961
===========

doc/source/v0.13.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ Bug Fixes
3333
- Fix running of stata IO tests. Now uses temporary files to write
3434
(:issue:`4353`)
3535

36+
- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean``
37+
for integer valued frames (:issue:`4365`)
38+
3639
See the :ref:`full release notes
3740
<release>` or issue tracker
3841
on GitHub for a complete list.

pandas/core/nanops.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ def f(values, axis=None, skipna=True, **kwds):
7171

7272
if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype):
7373
result = bn_func(values, axis=axis, **kwds)
74-
# prefer to treat inf/-inf as NA
74+
75+
# prefer to treat inf/-inf as NA, but must compute the func
76+
# twice :(
7577
if _has_infs(result):
7678
result = alt(values, axis=axis, skipna=skipna, **kwds)
7779
else:
@@ -86,7 +88,8 @@ def f(values, axis=None, skipna=True, **kwds):
8688

8789
def _bn_ok_dtype(dt):
8890
# Bottleneck chokes on datetime64
89-
return dt != np.object_ and not issubclass(dt.type, (np.datetime64,np.timedelta64))
91+
time_types = np.datetime64, np.timedelta64
92+
return dt != np.object_ and not issubclass(dt.type, time_types)
9093

9194

9295
def _has_infs(result):
@@ -95,10 +98,8 @@ def _has_infs(result):
9598
return lib.has_infs_f8(result)
9699
elif result.dtype == 'f4':
97100
return lib.has_infs_f4(result)
98-
else: # pragma: no cover
99-
raise TypeError('Only suppose float32/64 here')
100-
else:
101-
return np.isinf(result) or np.isneginf(result)
101+
return False
102+
return np.isinf(result) or np.isneginf(result)
102103

103104
def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
104105
""" return the correct fill value for the dtype of the values """

vb_suite/stat_ops.py

+29
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,35 @@
4343
Benchmark("df[1].sum(level=[0, 1])", setup, repeat=1,
4444
start_date=datetime(2011, 11, 15))
4545

46+
sum_setup = common_setup + """
47+
df = DataFrame(np.random.randn(100000, 4))
48+
dfi = DataFrame(np.random.randint(1000, size=df.shape))
49+
"""
50+
51+
stat_ops_frame_sum_int_axis_0 = \
52+
Benchmark("dfi.sum()", sum_setup, start_date=datetime(2013, 7, 25))
53+
54+
stat_ops_frame_sum_float_axis_0 = \
55+
Benchmark("df.sum()", sum_setup, start_date=datetime(2013, 7, 25))
56+
57+
stat_ops_frame_mean_int_axis_0 = \
58+
Benchmark("dfi.mean()", sum_setup, start_date=datetime(2013, 7, 25))
59+
60+
stat_ops_frame_mean_float_axis_0 = \
61+
Benchmark("df.mean()", sum_setup, start_date=datetime(2013, 7, 25))
62+
63+
stat_ops_frame_sum_int_axis_1 = \
64+
Benchmark("dfi.sum(1)", sum_setup, start_date=datetime(2013, 7, 25))
65+
66+
stat_ops_frame_sum_float_axis_1 = \
67+
Benchmark("df.sum(1)", sum_setup, start_date=datetime(2013, 7, 25))
68+
69+
stat_ops_frame_mean_int_axis_1 = \
70+
Benchmark("dfi.mean(1)", sum_setup, start_date=datetime(2013, 7, 25))
71+
72+
stat_ops_frame_mean_float_axis_1 = \
73+
Benchmark("df.mean(1)", sum_setup, start_date=datetime(2013, 7, 25))
74+
4675
#----------------------------------------------------------------------
4776
# rank
4877

0 commit comments

Comments
 (0)