Skip to content

Commit 03ff3e0

Browse files
jbrockmendelphoflmroeschkejonashaag
authored andcommitted
ENH: Timestamp.tz_localize support non-nano (pandas-dev#47355)
* ENH: Timestamp +- timedeltalike scalar support non-nano * ENH: Timestamp.__sub__(datetime) with non-nano * better exception message * BUG: concat not sorting mixed column names when None is included (pandas-dev#47331) * REGR: concat not sorting columns for mixed column names * Fix none in columns * BUG: concat not sorting column names when None is included * Update doc/source/whatsnew/v1.5.0.rst Co-authored-by: Matthew Roeschke <[email protected]> * Add gh reference Co-authored-by: Matthew Roeschke <[email protected]> * Add run-tests action (pandas-dev#47292) * Add run-tests action * Fix * Fix * Fix * Update macos-windows.yml * Update posix.yml * Update python-dev.yml * Update action.yml * Update macos-windows.yml * Update posix.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * Update python-dev.yml * ENH: Timestamp pickle support non-nano tzaware (pandas-dev#47340) * ENH: Timestamp.tz_localize support non-nano Co-authored-by: Patrick Hoefler <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: Jonas Haag <[email protected]>
1 parent 97cbb42 commit 03ff3e0

File tree

8 files changed

+113
-59
lines changed

8 files changed

+113
-59
lines changed

pandas/_libs/tslibs/ccalendar.pxd

-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ cpdef int32_t get_day_of_year(int year, int month, int day) nogil
1515
cpdef int get_lastbday(int year, int month) nogil
1616
cpdef int get_firstbday(int year, int month) nogil
1717

18-
cdef int64_t DAY_NANOS
19-
cdef int64_t HOUR_NANOS
2018
cdef dict c_MONTH_NUMBERS
2119

2220
cdef int32_t* month_offset

pandas/_libs/tslibs/ccalendar.pyx

-5
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,6 @@ DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
4747
int_to_weekday = {num: name for num, name in enumerate(DAYS)}
4848
weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday}
4949

50-
DAY_SECONDS = 86400
51-
HOUR_SECONDS = 3600
52-
53-
cdef const int64_t DAY_NANOS = DAY_SECONDS * 1_000_000_000
54-
cdef const int64_t HOUR_NANOS = HOUR_SECONDS * 1_000_000_000
5550

5651
# ----------------------------------------------------------------------
5752

pandas/_libs/tslibs/period.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ from pandas._libs.tslibs.np_datetime cimport (
6060
from pandas._libs.tslibs.timestamps import Timestamp
6161

6262
from pandas._libs.tslibs.ccalendar cimport (
63-
c_MONTH_NUMBERS,
6463
dayofweek,
6564
get_day_of_year,
6665
get_days_in_month,

pandas/_libs/tslibs/timestamps.pyx

+5-10
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,13 @@ from pandas._libs.tslibs.np_datetime cimport (
8484
check_dts_bounds,
8585
cmp_dtstructs,
8686
cmp_scalar,
87-
dt64_to_dtstruct,
8887
get_datetime64_unit,
8988
get_datetime64_value,
9089
get_unit_from_dtype,
9190
npy_datetimestruct,
91+
npy_datetimestruct_to_datetime,
9292
pandas_datetime_to_datetimestruct,
93-
pydatetime_to_dt64,
93+
pydatetime_to_dtstruct,
9494
)
9595

9696
from pandas._libs.tslibs.np_datetime import (
@@ -530,7 +530,8 @@ cdef class _Timestamp(ABCTimestamp):
530530
npy_datetimestruct dts
531531

532532
if own_tz is not None and not is_utc(own_tz):
533-
val = pydatetime_to_dt64(self, &dts) + self.nanosecond
533+
pydatetime_to_dtstruct(self, &dts)
534+
val = npy_datetimestruct_to_datetime(self._reso, &dts) + self.nanosecond
534535
else:
535536
val = self.value
536537
return val
@@ -2044,11 +2045,6 @@ default 'raise'
20442045
>>> pd.NaT.tz_localize()
20452046
NaT
20462047
"""
2047-
if self._reso != NPY_FR_ns:
2048-
if tz is None and self.tz is None:
2049-
return self
2050-
raise NotImplementedError(self._reso)
2051-
20522048
if ambiguous == 'infer':
20532049
raise ValueError('Cannot infer offset with only one time.')
20542050

@@ -2077,7 +2073,7 @@ default 'raise'
20772073
"Cannot localize tz-aware Timestamp, use tz_convert for conversions"
20782074
)
20792075

2080-
out = Timestamp(value, tz=tz)
2076+
out = type(self)._from_value_and_reso(value, self._reso, tz=tz)
20812077
if out is not NaT:
20822078
out._set_freq(self._freq) # avoid warning in constructor
20832079
return out
@@ -2124,7 +2120,6 @@ default 'raise'
21242120
>>> pd.NaT.tz_convert(tz='Asia/Tokyo')
21252121
NaT
21262122
"""
2127-
21282123
if self.tzinfo is None:
21292124
# tz naive, use tz_localize
21302125
raise TypeError(

pandas/_libs/tslibs/tzconversion.pyx

+45-22
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@ from numpy cimport (
2727

2828
cnp.import_array()
2929

30-
from pandas._libs.tslibs.ccalendar cimport (
31-
DAY_NANOS,
32-
HOUR_NANOS,
30+
from pandas._libs.tslibs.dtypes cimport (
31+
periods_per_day,
32+
periods_per_second,
3333
)
34-
from pandas._libs.tslibs.dtypes cimport periods_per_second
3534
from pandas._libs.tslibs.nattype cimport NPY_NAT
3635
from pandas._libs.tslibs.np_datetime cimport (
3736
NPY_DATETIMEUNIT,
@@ -153,6 +152,7 @@ cdef int64_t tz_localize_to_utc_single(
153152
return val
154153

155154
elif is_utc(tz) or tz is None:
155+
# TODO: test with non-nano
156156
return val
157157

158158
elif is_tzlocal(tz) or is_zoneinfo(tz):
@@ -161,6 +161,15 @@ cdef int64_t tz_localize_to_utc_single(
161161
elif is_fixed_offset(tz):
162162
_, deltas, _ = get_dst_info(tz)
163163
delta = deltas[0]
164+
# TODO: de-duplicate with Localizer.__init__
165+
if reso != NPY_DATETIMEUNIT.NPY_FR_ns:
166+
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
167+
delta = delta // 1000
168+
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
169+
delta = delta // 1_000_000
170+
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
171+
delta = delta // 1_000_000_000
172+
164173
return val - delta
165174

166175
else:
@@ -229,6 +238,7 @@ timedelta-like}
229238
bint fill_nonexist = False
230239
str stamp
231240
Localizer info = Localizer(tz, reso=reso)
241+
int64_t pph = periods_per_day(reso) // 24
232242

233243
# Vectorized version of DstTzInfo.localize
234244
if info.use_utc:
@@ -242,7 +252,9 @@ timedelta-like}
242252
if v == NPY_NAT:
243253
result[i] = NPY_NAT
244254
else:
245-
result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True, reso=reso)
255+
result[i] = v - _tz_localize_using_tzinfo_api(
256+
v, tz, to_utc=True, reso=reso
257+
)
246258
return result.base # to return underlying ndarray
247259

248260
elif info.use_fixed:
@@ -283,20 +295,22 @@ timedelta-like}
283295
shift_backward = True
284296
elif PyDelta_Check(nonexistent):
285297
from .timedeltas import delta_to_nanoseconds
286-
shift_delta = delta_to_nanoseconds(nonexistent)
298+
shift_delta = delta_to_nanoseconds(nonexistent, reso=reso)
287299
elif nonexistent not in ('raise', None):
288300
msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
289301
"shift_backwards} or a timedelta object")
290302
raise ValueError(msg)
291303

292304
# Determine whether each date lies left of the DST transition (store in
293305
# result_a) or right of the DST transition (store in result_b)
294-
result_a, result_b =_get_utc_bounds(vals, info.tdata, info.ntrans, info.deltas)
306+
result_a, result_b =_get_utc_bounds(
307+
vals, info.tdata, info.ntrans, info.deltas, reso=reso
308+
)
295309

296310
# silence false-positive compiler warning
297311
dst_hours = np.empty(0, dtype=np.int64)
298312
if infer_dst:
299-
dst_hours = _get_dst_hours(vals, result_a, result_b)
313+
dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso)
300314

301315
# Pre-compute delta_idx_offset that will be used if we go down non-existent
302316
# paths.
@@ -316,46 +330,51 @@ timedelta-like}
316330
left = result_a[i]
317331
right = result_b[i]
318332
if val == NPY_NAT:
333+
# TODO: test with non-nano
319334
result[i] = val
320335
elif left != NPY_NAT and right != NPY_NAT:
321336
if left == right:
337+
# TODO: test with non-nano
322338
result[i] = left
323339
else:
324340
if infer_dst and dst_hours[i] != NPY_NAT:
341+
# TODO: test with non-nano
325342
result[i] = dst_hours[i]
326343
elif is_dst:
327344
if ambiguous_array[i]:
328345
result[i] = left
329346
else:
330347
result[i] = right
331348
elif fill:
349+
# TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous
332350
result[i] = NPY_NAT
333351
else:
334-
stamp = _render_tstamp(val)
352+
stamp = _render_tstamp(val, reso=reso)
335353
raise pytz.AmbiguousTimeError(
336354
f"Cannot infer dst time from {stamp}, try using the "
337355
"'ambiguous' argument"
338356
)
339357
elif left != NPY_NAT:
340358
result[i] = left
341359
elif right != NPY_NAT:
360+
# TODO: test with non-nano
342361
result[i] = right
343362
else:
344363
# Handle nonexistent times
345364
if shift_forward or shift_backward or shift_delta != 0:
346365
# Shift the nonexistent time to the closest existing time
347-
remaining_mins = val % HOUR_NANOS
366+
remaining_mins = val % pph
348367
if shift_delta != 0:
349368
# Validate that we don't relocalize on another nonexistent
350369
# time
351-
if -1 < shift_delta + remaining_mins < HOUR_NANOS:
370+
if -1 < shift_delta + remaining_mins < pph:
352371
raise ValueError(
353372
"The provided timedelta will relocalize on a "
354373
f"nonexistent time: {nonexistent}"
355374
)
356375
new_local = val + shift_delta
357376
elif shift_forward:
358-
new_local = val + (HOUR_NANOS - remaining_mins)
377+
new_local = val + (pph - remaining_mins)
359378
else:
360379
# Subtract 1 since the beginning hour is _inclusive_ of
361380
# nonexistent times
@@ -368,7 +387,7 @@ timedelta-like}
368387
elif fill_nonexist:
369388
result[i] = NPY_NAT
370389
else:
371-
stamp = _render_tstamp(val)
390+
stamp = _render_tstamp(val, reso=reso)
372391
raise pytz.NonExistentTimeError(stamp)
373392

374393
return result.base # .base to get underlying ndarray
@@ -404,17 +423,19 @@ cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
404423
return left
405424

406425

407-
cdef inline str _render_tstamp(int64_t val):
426+
cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso):
408427
""" Helper function to render exception messages"""
409428
from pandas._libs.tslibs.timestamps import Timestamp
410-
return str(Timestamp(val))
429+
ts = Timestamp._from_value_and_reso(val, reso, None)
430+
return str(ts)
411431

412432

413433
cdef _get_utc_bounds(
414434
ndarray vals,
415435
int64_t* tdata,
416436
Py_ssize_t ntrans,
417437
const int64_t[::1] deltas,
438+
NPY_DATETIMEUNIT reso,
418439
):
419440
# Determine whether each date lies left of the DST transition (store in
420441
# result_a) or right of the DST transition (store in result_b)
@@ -424,6 +445,7 @@ cdef _get_utc_bounds(
424445
Py_ssize_t i, n = vals.size
425446
int64_t val, v_left, v_right
426447
Py_ssize_t isl, isr, pos_left, pos_right
448+
int64_t ppd = periods_per_day(reso)
427449

428450
result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
429451
result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
@@ -438,8 +460,8 @@ cdef _get_utc_bounds(
438460
if val == NPY_NAT:
439461
continue
440462

441-
# TODO: be careful of overflow in val-DAY_NANOS
442-
isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1
463+
# TODO: be careful of overflow in val-ppd
464+
isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1
443465
if isl < 0:
444466
isl = 0
445467

@@ -449,8 +471,8 @@ cdef _get_utc_bounds(
449471
if v_left + deltas[pos_left] == val:
450472
result_a[i] = v_left
451473

452-
# TODO: be careful of overflow in val+DAY_NANOS
453-
isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1
474+
# TODO: be careful of overflow in val+ppd
475+
isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1
454476
if isr < 0:
455477
isr = 0
456478

@@ -465,10 +487,11 @@ cdef _get_utc_bounds(
465487

466488
@cython.boundscheck(False)
467489
cdef ndarray[int64_t] _get_dst_hours(
468-
# vals only needed here to potential render an exception message
490+
# vals, reso only needed here to potential render an exception message
469491
const int64_t[:] vals,
470492
ndarray[int64_t] result_a,
471493
ndarray[int64_t] result_b,
494+
NPY_DATETIMEUNIT reso,
472495
):
473496
cdef:
474497
Py_ssize_t i, n = vals.shape[0]
@@ -497,7 +520,7 @@ cdef ndarray[int64_t] _get_dst_hours(
497520

498521
if trans_idx.size == 1:
499522
# TODO: not reached in tests 2022-05-02; possible?
500-
stamp = _render_tstamp(vals[trans_idx[0]])
523+
stamp = _render_tstamp(vals[trans_idx[0]], reso=reso)
501524
raise pytz.AmbiguousTimeError(
502525
f"Cannot infer dst time from {stamp} as there "
503526
"are no repeated times"
@@ -519,7 +542,7 @@ cdef ndarray[int64_t] _get_dst_hours(
519542
delta = np.diff(result_a[grp])
520543
if grp.size == 1 or np.all(delta > 0):
521544
# TODO: not reached in tests 2022-05-02; possible?
522-
stamp = _render_tstamp(vals[grp[0]])
545+
stamp = _render_tstamp(vals[grp[0]], reso=reso)
523546
raise pytz.AmbiguousTimeError(stamp)
524547

525548
# Find the index for the switch and pull from a for dst and b

pandas/_libs/tslibs/vectorized.pyx

-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ cnp.import_array()
1919

2020
from .dtypes import Resolution
2121

22-
from .ccalendar cimport DAY_NANOS
2322
from .dtypes cimport (
2423
c_Resolution,
2524
periods_per_day,

0 commit comments

Comments
 (0)