Skip to content

Commit 9776868

Browse files
jamestran201-altjreback
authored andcommitted
#18058: improve DatetimeIndex.date performance (#18163)
1 parent c619a67 commit 9776868

File tree

5 files changed

+53
-13
lines changed

5 files changed

+53
-13
lines changed

asv_bench/benchmarks/timeseries.py

+8
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ def time_dti_factorize(self):
8989
def time_dti_tz_factorize(self):
9090
self.dti_tz.factorize()
9191

92+
def time_dti_time(self):
93+
self.rng.time
94+
9295
def time_timestamp_tzinfo_cons(self):
9396
self.rng5[0]
9497

@@ -107,6 +110,11 @@ def time_infer_freq_daily(self):
107110
def time_infer_freq_business(self):
108111
infer_freq(self.b_freq)
109112

113+
def time_to_date(self):
114+
self.rng.date
115+
116+
def time_to_pydatetime(self):
117+
self.rng.to_pydatetime()
110118

111119
class TimeDatetimeConverter(object):
112120
goal_time = 0.2

doc/source/whatsnew/v0.22.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ Performance Improvements
9191
- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
9292
- The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
9393
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
94+
- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
95+
-
9496

9597
.. _whatsnew_0220.docs:
9698

pandas/_libs/tslib.pyx

+40-9
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ cimport util
2323

2424
from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
2525
PyDateTime_IMPORT,
26-
timedelta, datetime)
26+
timedelta, datetime, date)
2727
# import datetime C API
2828
PyDateTime_IMPORT
2929
# this is our datetime.pxd
@@ -80,10 +80,37 @@ cdef inline object create_datetime_from_ts(
8080
return datetime(dts.year, dts.month, dts.day, dts.hour,
8181
dts.min, dts.sec, dts.us, tz)
8282

83+
cdef inline object create_date_from_ts(
84+
int64_t value, pandas_datetimestruct dts,
85+
object tz, object freq):
86+
""" convenience routine to construct a datetime.date from its parts """
87+
return date(dts.year, dts.month, dts.day)
8388

84-
def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
85-
# convert an i8 repr to an ndarray of datetimes or Timestamp (if box ==
86-
# True)
89+
90+
def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None,
91+
box="datetime"):
92+
"""
93+
Convert an i8 repr to an ndarray of datetimes, date or Timestamp
94+
95+
Parameters
96+
----------
97+
arr : array of i8
98+
tz : str, default None
99+
convert to this timezone
100+
freq : str/Offset, default None
101+
freq to convert
102+
box : {'datetime', 'timestamp', 'date'}, default 'datetime'
103+
If datetime, convert to datetime.datetime
104+
If date, convert to datetime.date
105+
If Timestamp, convert to pandas.Timestamp
106+
107+
Returns
108+
-------
109+
result : array of dtype specified by box
110+
"""
111+
112+
assert ((box == "datetime") or (box == "date") or (box == "timestamp")), \
113+
"box must be one of 'datetime', 'date' or 'timestamp'"
87114

88115
cdef:
89116
Py_ssize_t i, n = len(arr)
@@ -94,13 +121,17 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
94121
ndarray[object] result = np.empty(n, dtype=object)
95122
object (*func_create)(int64_t, pandas_datetimestruct, object, object)
96123

97-
if box and is_string_object(freq):
98-
from pandas.tseries.frequencies import to_offset
99-
freq = to_offset(freq)
124+
if box == "date":
125+
assert (tz is None), "tz should be None when converting to date"
100126

101-
if box:
127+
func_create = create_date_from_ts
128+
elif box == "timestamp":
102129
func_create = create_timestamp_from_ts
103-
else:
130+
131+
if is_string_object(freq):
132+
from pandas.tseries.frequencies import to_offset
133+
freq = to_offset(freq)
134+
elif box == "datetime":
104135
func_create = create_datetime_from_ts
105136

106137
if tz is not None:

pandas/core/dtypes/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def convert_to_pydatetime(x, axis):
405405
else:
406406
shape = x.shape
407407
x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(),
408-
box=True)
408+
box="timestamp")
409409
x = x.reshape(shape)
410410

411411
elif x.dtype == _TD_DTYPE:

pandas/core/indexes/datetimes.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1237,7 +1237,7 @@ def __iter__(self):
12371237
end_i = min((i + 1) * chunksize, length)
12381238
converted = libts.ints_to_pydatetime(data[start_i:end_i],
12391239
tz=self.tz, freq=self.freq,
1240-
box=True)
1240+
box="timestamp")
12411241
for v in converted:
12421242
yield v
12431243

@@ -1687,8 +1687,7 @@ def date(self):
16871687
Returns numpy array of python datetime.date objects (namely, the date
16881688
part of Timestamps without timezone information).
16891689
"""
1690-
return self._maybe_mask_results(libalgos.arrmap_object(
1691-
self.asobject.values, lambda x: x.date()))
1690+
return libts.ints_to_pydatetime(self.normalize().asi8, box="date")
16921691

16931692
def normalize(self):
16941693
"""

0 commit comments

Comments
 (0)