diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 157d5fe1e3948..6330a2b36c516 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -1,70 +1,58 @@ -from .pandas_vb_common import * +import numpy as np +import pandas as pd +from .pandas_vb_common import setup # noqa -class replace_fillna(object): - goal_time = 0.2 - - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - def time_replace_fillna(self): - self.ts.fillna(0.0, inplace=True) +class FillNa(object): - -class replace_large_dict(object): goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def setup(self): - self.n = (10 ** 6) - self.start_value = (10 ** 5) - self.to_rep = {i: self.start_value + i for i in range(self.n)} - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - - def time_replace_large_dict(self): - self.s.replace(self.to_rep, inplace=True) + def setup(self, inplace): + N = 10**6 + rng = pd.date_range('1/1/2000', periods=N, freq='min') + data = np.random.randn(N) + data[::2] = np.nan + self.ts = pd.Series(data, index=rng) + def time_fillna(self, inplace): + self.ts.fillna(0.0, inplace=inplace) -class replace_convert(object): - goal_time = 0.5 + def time_replace(self, inplace): + self.ts.replace(np.nan, 0.0, inplace=inplace) - def setup(self): - self.n = (10 ** 3) - self.to_ts = {i: pd.Timestamp(i) for i in range(self.n)} - self.to_td = {i: pd.Timedelta(i) for i in range(self.n)} - self.s = Series(np.random.randint(self.n, size=(10 ** 3))) - self.df = DataFrame({'A': np.random.randint(self.n, size=(10 ** 3)), - 'B': np.random.randint(self.n, size=(10 ** 3))}) - def time_replace_series_timestamp(self): - self.s.replace(self.to_ts) +class ReplaceDict(object): - def time_replace_series_timedelta(self): - self.s.replace(self.to_td) + goal_time = 0.2 + params = [True, False] + param_names = ['inplace'] - def time_replace_frame_timestamp(self): - self.df.replace(self.to_ts) + def setup(self, inplace): + N = 10**5 + start_value = 10**5 + self.to_rep = dict(enumerate(np.arange(N) + start_value)) + self.s = pd.Series(np.random.randint(N, size=10**3)) - def time_replace_frame_timedelta(self): - self.df.replace(self.to_td) + def time_replace_series(self, inplace): + self.s.replace(self.to_rep, inplace=inplace) -class replace_replacena(object): - goal_time = 0.2 +class Convert(object): - def setup(self): - self.N = 1000000 - try: - self.rng = date_range('1/1/2000', periods=self.N, freq='min') - except NameError: - self.rng = DatetimeIndex('1/1/2000', periods=self.N, offset=datetools.Minute()) - self.date_range = DateRange - self.ts = Series(np.random.randn(self.N), index=self.rng) - - def time_replace_replacena(self): - self.ts.replace(np.nan, 0.0, inplace=True) + goal_time = 0.5 + params = (['DataFrame', 'Series'], ['Timestamp', 'Timedelta']) + param_names = ['contructor', 'replace_data'] + + def setup(self, contructor, replace_data): + N = 10**3 + data = {'Series': pd.Series(np.random.randint(N, size=N)), + 'DataFrame': pd.DataFrame({'A': np.random.randint(N, size=N), + 'B': np.random.randint(N, size=N)})} + self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} + self.data = data[contructor] + + def time_replace(self, contructor, replace_data): + self.data.replace(self.to_replace)