Skip to content

Commit f7aeaeb

Browse files
committed
Merge pull request #5815 from jreback/dropna_perf
PERF: perf issue with dropna on frame
2 parents 4bb199c + 2a8ccd4 commit f7aeaeb

File tree

3 files changed

+30
-1
lines changed

3 files changed

+30
-1
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Improvements to existing features
6767
- perf improvements in Series datetime/timedelta binary operations (:issue:`5801`)
6868
- `option_context` context manager now available as top-level API (:issue:`5752`)
6969
- df.info() view now display dtype info per column (:issue: `5682`)
70+
- perf improvements in DataFrame ``count/dropna`` for ``axis=1``
7071

7172
Bug Fixes
7273
~~~~~~~~~

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3952,7 +3952,7 @@ def count(self, axis=0, level=None, numeric_only=False):
39523952
counts = notnull(frame.values).sum(1)
39533953
result = Series(counts, index=frame._get_agg_axis(axis))
39543954
else:
3955-
result = DataFrame.apply(frame, Series.count, axis=axis)
3955+
result = notnull(frame).sum(axis=axis)
39563956

39573957
return result
39583958

vb_suite/frame_methods.py

+28
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,33 @@ def f(K=100):
289289
frame_isnull = Benchmark('isnull(df)', setup,
290290
start_date=datetime(2012,1,1))
291291

292+
## dropna
293+
setup = common_setup + """
294+
data = np.random.randn(10000, 1000)
295+
df = DataFrame(data)
296+
df.ix[50:1000,20:50] = np.nan
297+
df.ix[2000:3000] = np.nan
298+
df.ix[:,60:70] = np.nan
299+
"""
300+
frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', setup,
301+
start_date=datetime(2012,1,1))
302+
frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', setup,
303+
start_date=datetime(2012,1,1))
304+
305+
setup = common_setup + """
306+
data = np.random.randn(10000, 1000)
307+
df = DataFrame(data)
308+
df.ix[50:1000,20:50] = np.nan
309+
df.ix[2000:3000] = np.nan
310+
df.ix[:,60:70] = np.nan
311+
"""
312+
frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', setup,
313+
start_date=datetime(2012,1,1))
314+
315+
frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', setup,
316+
start_date=datetime(2012,1,1))
317+
318+
292319
#----------------------------------------------------------------------
293320
# apply
294321

@@ -298,3 +325,4 @@ def f(K=100):
298325
"""
299326
frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup,
300327
start_date=datetime(2012,1,1))
328+

0 commit comments

Comments
 (0)