From eedc7a047ce22c400b9f00114716de9413d5350c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 18 Dec 2017 21:15:10 -0800 Subject: [PATCH 1/2] CLN: ASV replace --- asv_bench/benchmarks/replace.py | 96 +++++++++++++++------------------ 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 157d5fe1e3948..4b7dd0bff1305 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,70 +1,58 @@ -from .pandas_vb_common import * +import numpy as np +import pandas as pd +from .pandas_vb_common import setup # noqa -class replace_fillna(object): - goal_time = 0.2 - - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - def time_replace_fillna(self): - self.ts.fillna(0.0, inplace=True) +class NaN(object): - -class replace_large_dict(object): goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def setup(self): - self.n = (10 ** 6) - self.start_value = (10 ** 5) - self.to_rep = {i: self.start_value + i for i in range(self.n)} - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - - def time_replace_large_dict(self): - self.s.replace(self.to_rep, inplace=True) + def setup(self, inplace): + N = 10**6 + rng = pd.date_range('1/1/2000', periods=N, freq='min') + data = np.random.randn(N) + data[::2] = np.nan + self.ts = pd.Series(data, index=rng) + def time_fillna(self, inplace): + self.ts.fillna(0.0, inplace=inplace) -class replace_convert(object): - goal_time = 0.5 + def time_replace(self, inplace): + self.ts.replace(np.nan, 0.0, inplace=inplace) - def setup(self): - self.n = (10 ** 3) - self.to_ts = {i: pd.Timestamp(i) for i in range(self.n)} - self.to_td = {i: pd.Timedelta(i) for i in range(self.n)} - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)), - 'B': np.random.randint(self.n, size=(10 ** 3))}) - def time_replace_series_timestamp(self): - self.s.replace(self.to_ts) +class ReplaceDict(object): - def time_replace_series_timedelta(self): - self.s.replace(self.to_td) + goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def time_replace_frame_timestamp(self): - self.df.replace(self.to_ts) + def setup(self, inplace): + N = 10**5 + start_value = 10**5 + self.to_rep = dict(enumerate(np.arange(N) + start_value)) + self.s = pd.Series(np.random.randint(N, size=10**3)) - def time_replace_frame_timedelta(self): - self.df.replace(self.to_td) + def time_replace_series(self, inplace): + self.s.replace(self.to_rep, inplace=inplace) -class replace_replacena(object): - goal_time = 0.2 +class Convert(object): - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - - def time_replace_replacena(self): - self.ts.replace(np.nan, 0.0, inplace=True) + goal_time = 0.5 + params = (['DataFrame', 'Series'], ['Timestamp', 'Timedelta']) + param_names = ['contructor', 'replace_data'] + + def setup(self, contructor, replace_data): + N = 10**3 + data = {'Series': pd.Series(np.random.randint(N, size=N)), + 'DataFrame': pd.DataFrame({'A': np.random.randint(N, size=N), + 'B': np.random.randint(N, size=N)})} + self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} + self.data = data[contructor] + + def time_replace(self, contructor, replace_data): + self.data.replace(self.to_replace) From 5b97df9fac9e42174a825e666158e5b8d9d1980e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Dec 2017 23:21:36 -0800 Subject: [PATCH 2/2] class NaN --> class FillNa --- asv_bench/benchmarks/replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 4b7dd0bff1305..6330a2b36c516 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -4,7 +4,7 @@ from .pandas_vb_common import setup # noqa -class NaN(object): +class FillNa(object): goal_time = 0.2 params = [True, False]