Skip to content

Commit d9a09ca

Browse files
authored
PERF: Timestamp.normalize (#35068)
* PERF: Timestamp.normalize * lint fixup
1 parent 559189a commit d9a09ca

File tree

4 files changed

+39
-25
lines changed

4 files changed

+39
-25
lines changed

asv_bench/benchmarks/tslibs/timestamp.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
import datetime
1+
from datetime import datetime, timedelta, timezone
22

3-
import dateutil
3+
from dateutil.tz import gettz, tzlocal, tzutc
44
import numpy as np
55
import pytz
66

77
from pandas import Timestamp
88

9+
# One case for each type of tzinfo object that has its own code path
10+
# in tzconversion code.
11+
_tzs = [
12+
None,
13+
pytz.timezone("Europe/Amsterdam"),
14+
gettz("US/Central"),
15+
pytz.UTC,
16+
tzutc(),
17+
timezone(timedelta(minutes=60)),
18+
tzlocal(),
19+
]
20+
921

1022
class TimestampConstruction:
1123
def setup(self):
1224
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
13-
self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0)
14-
self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
25+
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
26+
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
1527
self.ts = Timestamp("2020-01-01 00:00:00")
1628

1729
def time_parse_iso8601_no_tz(self):
@@ -49,7 +61,6 @@ def time_from_pd_timestamp(self):
4961

5062

5163
class TimestampProperties:
52-
_tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()]
5364
_freqs = [None, "B"]
5465
params = [_tzs, _freqs]
5566
param_names = ["tz", "freq"]
@@ -110,7 +121,7 @@ def time_weekday_name(self, tz, freq):
110121

111122

112123
class TimestampOps:
113-
params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
124+
params = _tzs
114125
param_names = ["tz"]
115126

116127
def setup(self, tz):
@@ -148,7 +159,7 @@ def time_ceil(self, tz):
148159

149160
class TimestampAcrossDst:
150161
def setup(self):
151-
dt = datetime.datetime(2016, 3, 27, 1)
162+
dt = datetime(2016, 3, 27, 1)
152163
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
153164
self.ts2 = Timestamp(dt)
154165

pandas/_libs/tslibs/conversion.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ cpdef datetime localize_pydatetime(datetime dt, object tz)
2626
cdef int64_t cast_from_unit(object ts, str unit) except? -1
2727

2828
cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo tz)
29+
cdef int64_t normalize_i8_stamp(int64_t local_val) nogil

pandas/_libs/tslibs/conversion.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -795,14 +795,14 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
795795
result[i] = NPY_NAT
796796
continue
797797
local_val = stamps[i]
798-
result[i] = _normalize_i8_stamp(local_val)
798+
result[i] = normalize_i8_stamp(local_val)
799799
elif is_tzlocal(tz):
800800
for i in range(n):
801801
if stamps[i] == NPY_NAT:
802802
result[i] = NPY_NAT
803803
continue
804804
local_val = tz_convert_utc_to_tzlocal(stamps[i], tz)
805-
result[i] = _normalize_i8_stamp(local_val)
805+
result[i] = normalize_i8_stamp(local_val)
806806
else:
807807
# Adjust datetime64 timestamp, recompute datetimestruct
808808
trans, deltas, typ = get_dst_info(tz)
@@ -815,21 +815,21 @@ cpdef ndarray[int64_t] normalize_i8_timestamps(const int64_t[:] stamps, tzinfo t
815815
result[i] = NPY_NAT
816816
continue
817817
local_val = stamps[i] + delta
818-
result[i] = _normalize_i8_stamp(local_val)
818+
result[i] = normalize_i8_stamp(local_val)
819819
else:
820820
pos = trans.searchsorted(stamps, side='right') - 1
821821
for i in range(n):
822822
if stamps[i] == NPY_NAT:
823823
result[i] = NPY_NAT
824824
continue
825825
local_val = stamps[i] + deltas[pos[i]]
826-
result[i] = _normalize_i8_stamp(local_val)
826+
result[i] = normalize_i8_stamp(local_val)
827827

828828
return result.base # `.base` to access underlying ndarray
829829

830830

831831
@cython.cdivision
832-
cdef inline int64_t _normalize_i8_stamp(int64_t local_val) nogil:
832+
cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil:
833833
"""
834834
Round the localized nanosecond timestamp down to the previous midnight.
835835

pandas/_libs/tslibs/timestamps.pyx

+15-13
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ from pandas._libs.tslibs.conversion cimport (
4040
_TSObject,
4141
convert_to_tsobject,
4242
convert_datetime_to_tsobject,
43-
normalize_i8_timestamps,
43+
normalize_i8_stamp,
4444
)
4545
from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field
4646
from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT
@@ -553,6 +553,20 @@ cdef class _Timestamp(ABCTimestamp):
553553
"""
554554
return ccalendar.get_days_in_month(self.year, self.month)
555555

556+
# -----------------------------------------------------------------
557+
# Transformation Methods
558+
559+
def normalize(self) -> "Timestamp":
560+
"""
561+
Normalize Timestamp to midnight, preserving tz information.
562+
"""
563+
cdef:
564+
local_val = self._maybe_convert_value_to_local()
565+
int64_t normalized
566+
567+
normalized = normalize_i8_stamp(local_val)
568+
return Timestamp(normalized).tz_localize(self.tzinfo)
569+
556570
# -----------------------------------------------------------------
557571
# Pickle Methods
558572

@@ -1455,18 +1469,6 @@ default 'raise'
14551469
self.nanosecond / 3600.0 / 1e+9
14561470
) / 24.0)
14571471

1458-
def normalize(self):
1459-
"""
1460-
Normalize Timestamp to midnight, preserving tz information.
1461-
"""
1462-
cdef:
1463-
ndarray[int64_t] normalized
1464-
tzinfo own_tz = self.tzinfo # could be None
1465-
1466-
normalized = normalize_i8_timestamps(
1467-
np.array([self.value], dtype="i8"), tz=own_tz)
1468-
return Timestamp(normalized[0]).tz_localize(own_tz)
1469-
14701472

14711473
# Aliases
14721474
Timestamp.weekofyear = Timestamp.week

0 commit comments

Comments
 (0)