diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 9614a63332609..2ca2416f58b57 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -89,6 +89,9 @@ def time_dti_factorize(self): def time_dti_tz_factorize(self): self.dti_tz.factorize() + def time_dti_time(self): + self.rng.time + def time_timestamp_tzinfo_cons(self): self.rng5[0] @@ -107,6 +110,11 @@ def time_infer_freq_daily(self): def time_infer_freq_business(self): infer_freq(self.b_freq) + def time_to_date(self): + self.rng.date + + def time_to_pydatetime(self): + self.rng.to_pydatetime() class TimeDatetimeConverter(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 4f403ff8053a7..b845e84d433f7 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -91,6 +91,8 @@ Performance Improvements - Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) - The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) +- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- .. _whatsnew_0220.docs: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5e3eb1f00b18c..a119e22b8e3ee 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -23,7 +23,7 @@ cimport util from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_IMPORT, - timedelta, datetime) + timedelta, datetime, date) # import datetime C API PyDateTime_IMPORT # this is our datetime.pxd @@ -80,10 +80,37 @@ cdef inline object create_datetime_from_ts( return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) +cdef inline object create_date_from_ts( + int64_t value, pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.date from its parts """ + return date(dts.year, dts.month, dts.day) -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): - # convert an i8 repr to an ndarray of datetimes or Timestamp (if box == - # True) + +def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, + box="datetime"): + """ + Convert an i8 repr to an ndarray of datetimes, date or Timestamp + + Parameters + ---------- + arr : array of i8 + tz : str, default None + convert to this timezone + freq : str/Offset, default None + freq to convert + box : {'datetime', 'timestamp', 'date'}, default 'datetime' + If datetime, convert to datetime.datetime + If date, convert to datetime.date + If Timestamp, convert to pandas.Timestamp + + Returns + ------- + result : array of dtype specified by box + """ + + assert ((box == "datetime") or (box == "date") or (box == "timestamp")), \ + "box must be one of 'datetime', 'date' or 'timestamp'" cdef: Py_ssize_t i, n = len(arr) @@ -94,13 +121,17 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): ndarray[object] result = np.empty(n, dtype=object) object (*func_create)(int64_t, pandas_datetimestruct, object, object) - if box and is_string_object(freq): - from pandas.tseries.frequencies import to_offset - freq = to_offset(freq) + if box == "date": + assert (tz is None), "tz should be None when converting to date" - if box: + func_create = create_date_from_ts + elif box == "timestamp": func_create = create_timestamp_from_ts - else: + + if is_string_object(freq): + from pandas.tseries.frequencies import to_offset + freq = to_offset(freq) + elif box == "datetime": func_create = create_datetime_from_ts if tz is not None: diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index fe306b51de8d0..7f9245bb31530 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -405,7 +405,7 @@ def convert_to_pydatetime(x, axis): else: shape = x.shape x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), - box=True) + box="timestamp") x = x.reshape(shape) elif x.dtype == _TD_DTYPE: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a2ed2ff9bce5e..e08bf4a625bce 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1237,7 +1237,7 @@ def __iter__(self): end_i = min((i + 1) * chunksize, length) converted = libts.ints_to_pydatetime(data[start_i:end_i], tz=self.tz, freq=self.freq, - box=True) + box="timestamp") for v in converted: yield v @@ -1687,8 +1687,7 @@ def date(self): Returns numpy array of python datetime.date objects (namely, the date part of Timestamps without timezone information). """ - return self._maybe_mask_results(libalgos.arrmap_object( - self.asobject.values, lambda x: x.date())) + return libts.ints_to_pydatetime(self.normalize().asi8, box="date") def normalize(self): """