diff --git a/doc/source/release.rst b/doc/source/release.rst index c0e155372760f..0074d3b359cbe 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -67,6 +67,7 @@ Improvements to existing features - perf improvements in Series datetime/timedelta binary operations (:issue:`5801`) - `option_context` context manager now available as top-level API (:issue:`5752`) - df.info() view now display dtype info per column (:issue: `5682`) + - perf improvements in DataFrame ``count/dropna`` for ``axis=1`` Bug Fixes ~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 97c284fb75a43..36cfbb524ab31 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3952,7 +3952,7 @@ def count(self, axis=0, level=None, numeric_only=False): counts = notnull(frame.values).sum(1) result = Series(counts, index=frame._get_agg_axis(axis)) else: - result = DataFrame.apply(frame, Series.count, axis=axis) + result = notnull(frame).sum(axis=axis) return result diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index ee4d876d20233..fd03d512125e7 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -289,6 +289,33 @@ def f(K=100): frame_isnull = Benchmark('isnull(df)', setup, start_date=datetime(2012,1,1)) +## dropna +setup = common_setup + """ +data = np.random.randn(10000, 1000) +df = DataFrame(data) +df.ix[50:1000,20:50] = np.nan +df.ix[2000:3000] = np.nan +df.ix[:,60:70] = np.nan +""" +frame_dropna_axis0_any = Benchmark('df.dropna(how="any",axis=0)', setup, + start_date=datetime(2012,1,1)) +frame_dropna_axis0_all = Benchmark('df.dropna(how="all",axis=0)', setup, + start_date=datetime(2012,1,1)) + +setup = common_setup + """ +data = np.random.randn(10000, 1000) +df = DataFrame(data) +df.ix[50:1000,20:50] = np.nan +df.ix[2000:3000] = np.nan +df.ix[:,60:70] = np.nan +""" +frame_dropna_axis1_any = Benchmark('df.dropna(how="any",axis=1)', setup, + start_date=datetime(2012,1,1)) + +frame_dropna_axis1_all = Benchmark('df.dropna(how="all",axis=1)', setup, + start_date=datetime(2012,1,1)) + + #---------------------------------------------------------------------- # apply @@ -298,3 +325,4 @@ def f(K=100): """ frame_apply_user_func = Benchmark('df.apply(lambda x: np.corrcoef(x,s)[0,1])', setup, start_date=datetime(2012,1,1)) +