From 842e6589f961881d9b9e0d1bc5b9716e62f70ae6 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Mon, 13 Nov 2017 23:14:50 -0500 Subject: [PATCH 1/3] Improve DatetimeIndex.time performance --- asv_bench/benchmarks/timeseries.py | 2 +- doc/source/whatsnew/v0.22.0.txt | 1 + pandas/_libs/tslib.pyx | 20 +++++++++++++++----- pandas/core/indexes/datetimes.py | 4 +--- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index b3996739e33f7..fe282df25e9c5 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -85,7 +85,7 @@ def time_dti_tz_factorize(self): self.dti_tz.factorize() def time_dti_time(self): - self.rng.time + self.dst_rng.time def time_timestamp_tzinfo_cons(self): self.rng5[0] diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 32b548e5f32f1..9878814c2cc17 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -231,6 +231,7 @@ Performance Improvements - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) - Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) - Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` .. _whatsnew_0220.docs: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 293e10d1934fa..fa020958a846a 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -15,7 +15,7 @@ from util cimport (is_integer_object, is_float_object, is_string_object, from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_CheckExact, PyDateTime_IMPORT, - timedelta, datetime, date) + timedelta, datetime, date, time) # import datetime C API PyDateTime_IMPORT @@ -70,11 +70,17 @@ cdef inline object create_date_from_ts( """ convenience routine to construct a datetime.date from its parts """ return date(dts.year, dts.month, dts.day) +cdef inline object create_time_from_ts( + int64_t value, pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.time from its parts """ + return time(dts.hour, dts.min, dts.sec, dts.us, tz) + def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box="datetime"): """ - Convert an i8 repr to an ndarray of datetimes, date or Timestamp + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp Parameters ---------- @@ -83,9 +89,10 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, convert to this timezone freq : str/Offset, default None freq to convert - box : {'datetime', 'timestamp', 'date'}, default 'datetime' + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' If datetime, convert to datetime.datetime If date, convert to datetime.date + If time, convert to datetime.time If Timestamp, convert to pandas.Timestamp Returns @@ -93,8 +100,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, result : array of dtype specified by box """ - assert ((box == "datetime") or (box == "date") or (box == "timestamp")), \ - "box must be one of 'datetime', 'date' or 'timestamp'" + assert ((box == "datetime") or (box == "date") or (box == "timestamp") + or (box == "time")), \ + "box must be one of 'datetime', 'date', 'time' or 'timestamp'" cdef: Py_ssize_t i, n = len(arr) @@ -115,6 +123,8 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, if is_string_object(freq): from pandas.tseries.frequencies import to_offset freq = to_offset(freq) + elif box == "time": + func_create = create_time_from_ts elif box == "datetime": func_create = create_datetime_from_ts diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 17b3a88cbf544..b793ab85a6b44 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1677,9 +1677,7 @@ def time(self): """ Returns numpy array of datetime.time. The time part of the Timestamps. """ - return self._maybe_mask_results(libalgos.arrmap_object( - self.astype(object).values, - lambda x: np.nan if x is libts.NaT else x.time())) + return libts.ints_to_pydatetime(self.asi8, self.tz, box="time") @property def date(self): From a132c6df4f5235da5c6ae1677d0dbad59f3798f4 Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sat, 9 Dec 2017 18:26:02 -0500 Subject: [PATCH 2/3] raise ValueError instead of using assertion in ints_to_pydatetime --- pandas/_libs/tslib.pyx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index fa020958a846a..7b0504388be22 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -100,10 +100,6 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, result : array of dtype specified by box """ - assert ((box == "datetime") or (box == "date") or (box == "timestamp") - or (box == "time")), \ - "box must be one of 'datetime', 'date', 'time' or 'timestamp'" - cdef: Py_ssize_t i, n = len(arr) ndarray[int64_t] trans, deltas @@ -127,6 +123,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, func_create = create_time_from_ts elif box == "datetime": func_create = create_datetime_from_ts + else: + raise ValueError("box must be one of 'datetime', 'date', 'time' or" + + " 'timestamp'") if tz is not None: if is_utc(tz): From 9734ae2b3f6154348300a3f906e066aae42b2fec Mon Sep 17 00:00:00 2001 From: tmnhat2001 Date: Sat, 9 Dec 2017 19:37:43 -0500 Subject: [PATCH 3/3] Remove unused import --- pandas/core/indexes/datetimes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index b793ab85a6b44..290c77dd7f040 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -53,8 +53,7 @@ import pandas.core.tools.datetimes as tools from pandas._libs import (lib, index as libindex, tslib as libts, - algos as libalgos, join as libjoin, - Timestamp) + join as libjoin, Timestamp) from pandas._libs.tslibs import (timezones, conversion, fields, parsing, period as libperiod)