From 9fa305873c4294f6479d52e170eea0cb5b9f55d1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Oct 2019 15:20:44 -0700 Subject: [PATCH 1/3] REF: implement benchmarks/tslibs --- asv_bench/benchmarks/tslibs/__init__.py | 7 +++++++ asv_bench/benchmarks/{ => tslibs}/timestamp.py | 0 2 files changed, 7 insertions(+) create mode 100644 asv_bench/benchmarks/tslibs/__init__.py rename asv_bench/benchmarks/{ => tslibs}/timestamp.py (100%) diff --git a/asv_bench/benchmarks/tslibs/__init__.py b/asv_bench/benchmarks/tslibs/__init__.py new file mode 100644 index 0000000000000..815cf55b291cd --- /dev/null +++ b/asv_bench/benchmarks/tslibs/__init__.py @@ -0,0 +1,7 @@ +""" +Benchmarks in this directory should depend only on tslibs, tseries.offsets, +and to_offset. + +i.e. any code changes that do not touch those files should not need to +run these benchmarks. +""" diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py similarity index 100% rename from asv_bench/benchmarks/timestamp.py rename to asv_bench/benchmarks/tslibs/timestamp.py From 7a41d41a147e111c155f8c676c6170f4074c45c8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Oct 2019 15:30:17 -0700 Subject: [PATCH 2/3] REF: tslibs benchmarks for Timedelta, Period --- asv_bench/benchmarks/period.py | 67 ++--------------------- asv_bench/benchmarks/timedelta.py | 66 ++-------------------- asv_bench/benchmarks/tslibs/period.py | 70 ++++++++++++++++++++++++ asv_bench/benchmarks/tslibs/timedelta.py | 61 +++++++++++++++++++++ 4 files changed, 140 insertions(+), 124 deletions(-) create mode 100644 asv_bench/benchmarks/tslibs/period.py create mode 100644 asv_bench/benchmarks/tslibs/timedelta.py diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 7303240a25f29..b52aa2e55af35 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -1,71 +1,12 @@ +""" +Period benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.period for benchmarks that rely only on tslibs. +""" from pandas import DataFrame, Period, PeriodIndex, Series, date_range, period_range from pandas.tseries.frequencies import to_offset -class PeriodProperties: - - params = ( - ["M", "min"], - [ - "year", - "month", - "day", - "hour", - "minute", - "second", - "is_leap_year", - "quarter", - "qyear", - "week", - "daysinmonth", - "dayofweek", - "dayofyear", - "start_time", - "end_time", - ], - ) - param_names = ["freq", "attr"] - - def setup(self, freq, attr): - self.per = Period("2012-06-01", freq=freq) - - def time_property(self, freq, attr): - getattr(self.per, attr) - - -class PeriodUnaryMethods: - - params = ["M", "min"] - param_names = ["freq"] - - def setup(self, freq): - self.per = Period("2012-06-01", freq=freq) - - def time_to_timestamp(self, freq): - self.per.to_timestamp() - - def time_now(self, freq): - self.per.now(freq) - - def time_asfreq(self, freq): - self.per.asfreq("A") - - -class PeriodConstructor: - params = [["D"], [True, False]] - param_names = ["freq", "is_offset"] - - def setup(self, freq, is_offset): - if is_offset: - self.freq = to_offset(freq) - else: - self.freq = freq - - def time_period_constructor(self, freq, is_offset): - Period("2012-06-01", freq=freq) - - class PeriodIndexConstructor: params = [["D"], [True, False]] diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 36a9db529f98f..828134b80aa3d 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,49 +1,11 @@ -import datetime +""" +Timedelta benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.timedelta for benchmarks that rely only on tslibs. +""" import numpy as np -from pandas import ( - DataFrame, - Series, - Timedelta, - Timestamp, - timedelta_range, - to_timedelta, -) - - -class TimedeltaConstructor: - def time_from_int(self): - Timedelta(123456789) - - def time_from_unit(self): - Timedelta(1, unit="d") - - def time_from_components(self): - Timedelta( - days=1, - hours=2, - minutes=3, - seconds=4, - milliseconds=5, - microseconds=6, - nanoseconds=7, - ) - - def time_from_datetime_timedelta(self): - Timedelta(datetime.timedelta(days=1, seconds=1)) - - def time_from_np_timedelta(self): - Timedelta(np.timedelta64(1, "ms")) - - def time_from_string(self): - Timedelta("1 days") - - def time_from_iso_format(self): - Timedelta("P4DT12H30M5S") - - def time_from_missing(self): - Timedelta("nat") +from pandas import DataFrame, Series, Timestamp, timedelta_range, to_timedelta class ToTimedelta: @@ -88,24 +50,6 @@ def time_add_td_ts(self): self.td + self.ts -class TimedeltaProperties: - def setup_cache(self): - td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) - return td - - def time_timedelta_days(self, td): - td.days - - def time_timedelta_seconds(self, td): - td.seconds - - def time_timedelta_microseconds(self, td): - td.microseconds - - def time_timedelta_nanoseconds(self, td): - td.nanoseconds - - class DatetimeAccessor: def setup_cache(self): N = 100000 diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py new file mode 100644 index 0000000000000..9156c4aa90ea0 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/period.py @@ -0,0 +1,70 @@ +""" +Period benchmarks that rely only on tslibs. See benchmarks.period for +Period benchmarks that rely on other parts fo pandas. +""" +from pandas import Period + +from pandas.tseries.frequencies import to_offset + + +class PeriodProperties: + + params = ( + ["M", "min"], + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "is_leap_year", + "quarter", + "qyear", + "week", + "daysinmonth", + "dayofweek", + "dayofyear", + "start_time", + "end_time", + ], + ) + param_names = ["freq", "attr"] + + def setup(self, freq, attr): + self.per = Period("2012-06-01", freq=freq) + + def time_property(self, freq, attr): + getattr(self.per, attr) + + +class PeriodUnaryMethods: + + params = ["M", "min"] + param_names = ["freq"] + + def setup(self, freq): + self.per = Period("2012-06-01", freq=freq) + + def time_to_timestamp(self, freq): + self.per.to_timestamp() + + def time_now(self, freq): + self.per.now(freq) + + def time_asfreq(self, freq): + self.per.asfreq("A") + + +class PeriodConstructor: + params = [["D"], [True, False]] + param_names = ["freq", "is_offset"] + + def setup(self, freq, is_offset): + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_period_constructor(self, freq, is_offset): + Period("2012-06-01", freq=freq) diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py new file mode 100644 index 0000000000000..8a16ddc189483 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -0,0 +1,61 @@ +""" +Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for +Timedelta benchmarks that rely on other parts fo pandas. +""" +import datetime + +import numpy as np + +from pandas import Timedelta + + +class TimedeltaConstructor: + def time_from_int(self): + Timedelta(123456789) + + def time_from_unit(self): + Timedelta(1, unit="d") + + def time_from_components(self): + Timedelta( + days=1, + hours=2, + minutes=3, + seconds=4, + milliseconds=5, + microseconds=6, + nanoseconds=7, + ) + + def time_from_datetime_timedelta(self): + Timedelta(datetime.timedelta(days=1, seconds=1)) + + def time_from_np_timedelta(self): + Timedelta(np.timedelta64(1, "ms")) + + def time_from_string(self): + Timedelta("1 days") + + def time_from_iso_format(self): + Timedelta("P4DT12H30M5S") + + def time_from_missing(self): + Timedelta("nat") + + +class TimedeltaProperties: + def setup_cache(self): + td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + return td + + def time_timedelta_days(self, td): + td.days + + def time_timedelta_seconds(self, td): + td.seconds + + def time_timedelta_microseconds(self, td): + td.microseconds + + def time_timedelta_nanoseconds(self, td): + td.nanoseconds From 4bb4b48a728b681a3256a633105f7ccfecc94885 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 29 Oct 2019 16:24:53 -0700 Subject: [PATCH 3/3] REF: tslibs offsets benchmarks --- asv_bench/benchmarks/offset.py | 49 -------------- asv_bench/benchmarks/tslibs/offsets.py | 90 ++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 49 deletions(-) create mode 100644 asv_bench/benchmarks/tslibs/offsets.py diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index d822646e712ae..a960f43f46acd 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -1,8 +1,5 @@ -from datetime import datetime import warnings -import numpy as np - import pandas as pd try: @@ -54,24 +51,6 @@ def time_apply_index(self, offset): offset.apply_index(self.rng) -class OnOffset: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - self.dates = [ - datetime(2016, m, d) - for m in [10, 11, 12] - for d in [1, 2, 3, 28, 29, 30, 31] - if not (m == 11 and d == 31) - ] - - def time_on_offset(self, offset): - for date in self.dates: - offset.onOffset(date) - - class OffsetSeriesArithmetic: params = offsets @@ -99,31 +78,3 @@ def setup(self, offset): def time_add_offset(self, offset): with warnings.catch_warnings(record=True): self.data + offset - - -class OffestDatetimeArithmetic: - - params = offsets - param_names = ["offset"] - - def setup(self, offset): - self.date = datetime(2011, 1, 1) - self.dt64 = np.datetime64("2011-01-01 09:00Z") - - def time_apply(self, offset): - offset.apply(self.date) - - def time_apply_np_dt64(self, offset): - offset.apply(self.dt64) - - def time_add(self, offset): - self.date + offset - - def time_add_10(self, offset): - self.date + (10 * offset) - - def time_subtract(self, offset): - self.date - offset - - def time_subtract_10(self, offset): - self.date - (10 * offset) diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py new file mode 100644 index 0000000000000..d6379b922641c --- /dev/null +++ b/asv_bench/benchmarks/tslibs/offsets.py @@ -0,0 +1,90 @@ +""" +offsets benchmarks that rely only on tslibs. See benchmarks.offset for +offsets benchmarks that rely on other parts of pandas. +""" +from datetime import datetime + +import numpy as np + +from pandas import offsets + +try: + import pandas.tseries.holiday # noqa +except ImportError: + pass + +hcal = pandas.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplimentedError with .apply_index() +non_apply = [ + offsets.Day(), + offsets.BYearEnd(), + offsets.BYearBegin(), + offsets.BQuarterEnd(), + offsets.BQuarterBegin(), + offsets.BMonthEnd(), + offsets.BMonthBegin(), + offsets.CustomBusinessDay(), + offsets.CustomBusinessDay(calendar=hcal), + offsets.CustomBusinessMonthBegin(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + offsets.YearEnd(), + offsets.YearBegin(), + offsets.QuarterEnd(), + offsets.QuarterBegin(), + offsets.MonthEnd(), + offsets.MonthBegin(), + offsets.DateOffset(months=2, days=2), + offsets.BusinessDay(), + offsets.SemiMonthEnd(), + offsets.SemiMonthBegin(), +] +offset_objs = non_apply + other_offsets + + +class OnOffset: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + + def time_on_offset(self, offset): + for date in self.dates: + offset.onOffset(date) + + +class OffestDatetimeArithmetic: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.date = datetime(2011, 1, 1) + self.dt64 = np.datetime64("2011-01-01 09:00Z") + + def time_apply(self, offset): + offset.apply(self.date) + + def time_apply_np_dt64(self, offset): + offset.apply(self.dt64) + + def time_add(self, offset): + self.date + offset + + def time_add_10(self, offset): + self.date + (10 * offset) + + def time_subtract(self, offset): + self.date - offset + + def time_subtract_10(self, offset): + self.date - (10 * offset)