From f36cc17d40d01fae13287f7ccc9088a0a9465bbb Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 25 Oct 2013 08:16:42 -0400 Subject: [PATCH] PERF: vbench for time-series index assignment in frame (GH5320) PERF: direct index assignment in a frame was doing lots of work --- pandas/core/frame.py | 6 +++++- vb_suite/frame_methods.py | 15 +++++++++++++++ vb_suite/timeseries.py | 4 ++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e33532587506..eb98bcfa00ecb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1914,7 +1914,11 @@ def insert(self, loc, column, value, allow_duplicates=False): def _sanitize_column(self, key, value): # Need to make sure new columns (which go into the BlockManager as new # blocks) are always copied - if _is_sequence(value): + + # dont' need further processing on an equal index + if isinstance(value, Index) and (not len(self.index) or value.equals(self.index)): + value = value.values.copy() + elif isinstance(value, Series) or _is_sequence(value): is_frame = isinstance(value, DataFrame) if isinstance(value, Series) or is_frame: if value.index.equals(self.index) or not len(self.index): diff --git a/vb_suite/frame_methods.py b/vb_suite/frame_methods.py index 67c0aa227f886..3567ee2b09f99 100644 --- a/vb_suite/frame_methods.py +++ b/vb_suite/frame_methods.py @@ -120,6 +120,21 @@ def j(): frame_getitem_single_column2 = Benchmark('j()', setup, start_date=datetime(2010, 6, 1)) +#---------------------------------------------------------------------- +# assignment + +setup = common_setup + """ +idx = date_range('1/1/2000', periods=100000, freq='D') +df = DataFrame(randn(100000, 1),columns=['A'],index=idx) +def f(x): + x = x.copy() + x['date'] = x.index +""" + +frame_assign_timeseries_index = Benchmark('f(df)', setup, + start_date=datetime(2013, 10, 1)) + + #---------------------------------------------------------------------- # to_string diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index a990a9873cea0..0850499f42480 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -229,12 +229,12 @@ def date_range(start=None, end=None, periods=None, freq=None): # tz_localize with infer argument. This is an attempt to emulate the results # of read_csv with duplicated data. Not passing infer_dst will fail setup = common_setup + """ -dst_rng = date_range('10/29/2000 1:00:00', +dst_rng = date_range('10/29/2000 1:00:00', '10/29/2000 1:59:59', freq='S') index = date_range('10/29/2000', '10/29/2000 00:59:59', freq='S') index = index.append(dst_rng) index = index.append(dst_rng) -index = index.append(date_range('10/29/2000 2:00:00', +index = index.append(date_range('10/29/2000 2:00:00', '10/29/2000 3:00:00', freq='S')) """