Skip to content

#18058: improve DatetimeIndex.date performance #18163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 22, 2017
8 changes: 8 additions & 0 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def time_dti_factorize(self):
def time_dti_tz_factorize(self):
self.dti_tz.factorize()

def time_dti_time(self):
self.rng.time

def time_timestamp_tzinfo_cons(self):
self.rng5[0]

Expand All @@ -107,6 +110,11 @@ def time_infer_freq_daily(self):
def time_infer_freq_business(self):
infer_freq(self.b_freq)

def time_to_date(self):
self.rng.date

def time_to_pydatetime(self):
self.rng.to_pydatetime()

class TimeDatetimeConverter(object):
goal_time = 0.2
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ Performance Improvements
- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
- The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
-

.. _whatsnew_0220.docs:

Expand Down
49 changes: 40 additions & 9 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cimport util

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyDateTime_IMPORT,
timedelta, datetime)
timedelta, datetime, date)
# import datetime C API
PyDateTime_IMPORT
# this is our datetime.pxd
Expand Down Expand Up @@ -80,10 +80,37 @@ cdef inline object create_datetime_from_ts(
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)

cdef inline object create_date_from_ts(
int64_t value, pandas_datetimestruct dts,
object tz, object freq):
""" convenience routine to construct a datetime.date from its parts """
return date(dts.year, dts.month, dts.day)

def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
# convert an i8 repr to an ndarray of datetimes or Timestamp (if box ==
# True)

def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
box="datetime"):
"""
Convert an i8 repr to an ndarray of datetimes, date or Timestamp
Parameters
----------
arr : array of i8
tz : str, default None
convert to this timezone
freq : str/Offset, default None
freq to convert
box : {'datetime', 'timestamp', 'date'}, default 'datetime'
If datetime, convert to datetime.datetime
If date, convert to datetime.date
If Timestamp, convert to pandas.Timestamp
Returns
-------
result : array of dtype specified by box
"""

assert ((box == "datetime") or (box == "date") or (box == "timestamp")), \
"box must be one of 'datetime', 'date' or 'timestamp'"

cdef:
Py_ssize_t i, n = len(arr)
Expand All @@ -94,13 +121,17 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
ndarray[object] result = np.empty(n, dtype=object)
object (*func_create)(int64_t, pandas_datetimestruct, object, object)

if box and is_string_object(freq):
from pandas.tseries.frequencies import to_offset
freq = to_offset(freq)
if box == "date":
assert (tz is None), "tz should be None when converting to date"

if box:
func_create = create_date_from_ts
elif box == "timestamp":
func_create = create_timestamp_from_ts
else:

if is_string_object(freq):
from pandas.tseries.frequencies import to_offset
freq = to_offset(freq)
elif box == "datetime":
func_create = create_datetime_from_ts

if tz is not None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def convert_to_pydatetime(x, axis):
else:
shape = x.shape
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(),
box=True)
box="timestamp")
x = x.reshape(shape)

elif x.dtype == _TD_DTYPE:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,7 +1237,7 @@ def __iter__(self):
end_i = min((i + 1) * chunksize, length)
converted = libts.ints_to_pydatetime(data[start_i:end_i],
tz=self.tz, freq=self.freq,
box=True)
box="timestamp")
for v in converted:
yield v

Expand Down Expand Up @@ -1687,8 +1687,7 @@ def date(self):
Returns numpy array of python datetime.date objects (namely, the date
part of Timestamps without timezone information).
"""
return self._maybe_mask_results(libalgos.arrmap_object(
self.asobject.values, lambda x: x.date()))
return libts.ints_to_pydatetime(self.normalize().asi8, box="date")

def normalize(self):
"""
Expand Down