-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PERF: Datetime/Timestamp.normalize for timezone naive datetimes #23634
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
62827ef
ff171f7
efb281f
cc6eee0
b90abd9
35fac22
f09559a
0b3a664
9042aa1
3a23170
52a7eb2
6204d21
1f1d455
44a8808
e6c74d2
0ece208
bc5571a
243d73a
fb11dcf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,8 @@ from cpython.datetime cimport (datetime, tzinfo, | |
PyDateTime_CheckExact, PyDateTime_IMPORT) | ||
PyDateTime_IMPORT | ||
|
||
from ccalendar import DAY_SECONDS | ||
|
||
from np_datetime cimport (check_dts_bounds, | ||
npy_datetimestruct, | ||
pandas_datetime_to_datetimestruct, _string_to_dts, | ||
|
@@ -41,7 +43,6 @@ from nattype cimport NPY_NAT, checknull_with_nat | |
# ---------------------------------------------------------------------- | ||
# Constants | ||
|
||
cdef int64_t DAY_NS = 86400000000000LL | ||
cdef int64_t HOURS_NS = 3600000000000 | ||
NS_DTYPE = np.dtype('M8[ns]') | ||
TD_DTYPE = np.dtype('m8[ns]') | ||
|
@@ -931,10 +932,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, | |
result_b[:] = NPY_NAT | ||
|
||
idx_shifted_left = (np.maximum(0, trans.searchsorted( | ||
vals - DAY_NS, side='right') - 1)).astype(np.int64) | ||
vals - DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) | ||
|
||
idx_shifted_right = (np.maximum(0, trans.searchsorted( | ||
vals + DAY_NS, side='right') - 1)).astype(np.int64) | ||
vals + DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) | ||
|
||
for i in range(n): | ||
val = vals[i] | ||
|
@@ -1116,9 +1117,9 @@ def normalize_date(dt: object) -> datetime: | |
@cython.boundscheck(False) | ||
def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): | ||
""" | ||
Normalize each of the (nanosecond) timestamps in the given array by | ||
rounding down to the beginning of the day (i.e. midnight). If `tz` | ||
is not None, then this is midnight for this timezone. | ||
Normalize each of the (nanosecond) timezone aware timestamps in the given | ||
array by rounding down to the beginning of the day (i.e. midnight). | ||
This is midnight for timezone, `tz`. | ||
|
||
Parameters | ||
---------- | ||
|
@@ -1130,21 +1131,11 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None): | |
result : int64 ndarray of converted of normalized nanosecond timestamps | ||
""" | ||
cdef: | ||
Py_ssize_t i, n = len(stamps) | ||
npy_datetimestruct dts | ||
Py_ssize_t n = len(stamps) | ||
int64_t[:] result = np.empty(n, dtype=np.int64) | ||
|
||
if tz is not None: | ||
tz = maybe_get_tz(tz) | ||
result = _normalize_local(stamps, tz) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this case never reached? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. This case (the naive case) is handled in these two places now: https://github.com/pandas-dev/pandas/pull/23634/files#diff-231ac35d2116a12844a7cfed02730580R1289 |
||
with nogil: | ||
for i in range(n): | ||
if stamps[i] == NPY_NAT: | ||
result[i] = NPY_NAT | ||
continue | ||
dt64_to_dtstruct(stamps[i], &dts) | ||
result[i] = _normalized_stamp(&dts) | ||
tz = maybe_get_tz(tz) | ||
result = _normalize_local(stamps, tz) | ||
|
||
return result.base # .base to access underlying np.ndarray | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
from pandas._libs import lib, tslib | ||
from pandas._libs.tslib import Timestamp, NaT, iNaT | ||
from pandas._libs.tslibs import ( | ||
normalize_date, | ||
ccalendar, normalize_date, | ||
conversion, fields, timezones, | ||
resolution as libresolution) | ||
|
||
|
@@ -849,7 +849,14 @@ def normalize(self): | |
'2014-08-01 00:00:00+05:30'], | ||
dtype='datetime64[ns, Asia/Calcutta]', freq=None) | ||
""" | ||
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) | ||
if self.tz is None: | ||
not_null = self.notnull() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you use notna |
||
DAY_NS = ccalendar.DAY_SECONDS * 1000000000 | ||
new_values = self.asi8.copy() | ||
adjustment = (new_values[not_null] % DAY_NS) | ||
new_values[not_null] = new_values[not_null] - adjustment | ||
else: | ||
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) | ||
return type(self)(new_values, freq='infer').tz_localize(self.tz) | ||
|
||
def to_period(self, freq=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -328,6 +328,17 @@ def test_replace_dst_border(self): | |
expected = Timestamp('2013-11-3 03:00:00', tz='America/Chicago') | ||
assert result == expected | ||
|
||
# -------------------------------------------------------------- | ||
# Timestamp.normalize | ||
|
||
@pytest.mark.parametrize('arg', ['2013-11-30', '2013-11-30 12:00:00']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a normalize_nat test as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't define There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could have one for Timstamp mirroring (another issue). Probably would just return |
||
def test_normalize(self, tz_naive_fixture, arg): | ||
tz = tz_naive_fixture | ||
ts = Timestamp(arg, tz=tz) | ||
result = ts.normalize() | ||
expected = Timestamp('2013-11-30', tz=tz) | ||
assert result == expected | ||
|
||
# -------------------------------------------------------------- | ||
|
||
@td.skip_if_windows | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should prob move this one too (future ok)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you move this one as well