Skip to content

Commit 6e04264

Browse files
authored
ENH: add fold support to Timestamp constructor (pandas-dev#31563)
1 parent 1b8b428 commit 6e04264

File tree

11 files changed

+354
-37
lines changed

11 files changed

+354
-37
lines changed

doc/source/user_guide/timeseries.rst

+29
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None)
22972297
# tz_convert(None) is identical to tz_convert('UTC').tz_localize(None)
22982298
didx.tz_convert('UTC').tz_localize(None)
22992299
2300+
.. _timeseries.fold:
2301+
2302+
Fold
2303+
~~~~
2304+
2305+
.. versionadded:: 1.1.0
2306+
2307+
For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument.
2308+
Due to daylight saving time, one wall clock time can occur twice when shifting
2309+
from summer to winter time; fold describes whether the datetime-like corresponds
2310+
to the first (0) or the second time (1) the wall clock hits the ambiguous time.
2311+
Fold is supported only for constructing from naive ``datetime.datetime``
2312+
(see `datetime documentation <https://docs.python.org/3/library/datetime.html>`__ for details) or from :class:`Timestamp`
2313+
or for constructing from components (see below). Only ``dateutil`` timezones are supported
2314+
(see `dateutil documentation <https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.enfold>`__
2315+
for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz``
2316+
timezones do not support fold (see `pytz documentation <http://pytz.sourceforge.net/index.html>`__
2317+
for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime
2318+
with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely
2319+
on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct
2320+
control over how they are handled.
2321+
2322+
.. ipython:: python
2323+
2324+
pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0),
2325+
tz='dateutil/Europe/London', fold=0)
2326+
pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
2327+
tz='dateutil/Europe/London', fold=1)
2328+
23002329
.. _timeseries.timezone_ambiguous:
23012330

23022331
Ambiguous times when localizing

doc/source/whatsnew/v1.1.0.rst

+22
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,28 @@ For example:
3636
ser["2014"]
3737
ser.loc["May 2015"]
3838
39+
.. _whatsnew_110.timestamp_fold_support:
40+
41+
Fold argument support in Timestamp constructor
42+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
43+
44+
:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 <https://www.python.org/dev/peps/pep-0495/#the-fold-attribute>`_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold.
45+
46+
For example:
47+
48+
.. ipython:: python
49+
50+
ts = pd.Timestamp("2019-10-27 01:30:00+00:00")
51+
ts.fold
52+
53+
.. ipython:: python
54+
55+
ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
56+
tz="dateutil/Europe/London", fold=1)
57+
ts
58+
59+
For more on working with fold, see :ref:`Fold subsection <timeseries.fold>` in the user guide.
60+
3961
.. _whatsnew_110.enhancements.other:
4062

4163
Other enhancements

pandas/_libs/tslib.pyx

+20-12
Original file line numberDiff line numberDiff line change
@@ -49,30 +49,31 @@ from pandas._libs.tslibs.tzconversion cimport (
4949

5050
cdef inline object create_datetime_from_ts(
5151
int64_t value, npy_datetimestruct dts,
52-
object tz, object freq):
52+
object tz, object freq, bint fold):
5353
""" convenience routine to construct a datetime.datetime from its parts """
5454
return datetime(dts.year, dts.month, dts.day, dts.hour,
55-
dts.min, dts.sec, dts.us, tz)
55+
dts.min, dts.sec, dts.us, tz, fold=fold)
5656

5757

5858
cdef inline object create_date_from_ts(
5959
int64_t value, npy_datetimestruct dts,
60-
object tz, object freq):
60+
object tz, object freq, bint fold):
6161
""" convenience routine to construct a datetime.date from its parts """
62+
# GH 25057 add fold argument to match other func_create signatures
6263
return date(dts.year, dts.month, dts.day)
6364

6465

6566
cdef inline object create_time_from_ts(
6667
int64_t value, npy_datetimestruct dts,
67-
object tz, object freq):
68+
object tz, object freq, bint fold):
6869
""" convenience routine to construct a datetime.time from its parts """
69-
return time(dts.hour, dts.min, dts.sec, dts.us, tz)
70+
return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold)
7071

7172

7273
@cython.wraparound(False)
7374
@cython.boundscheck(False)
7475
def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
75-
str box="datetime"):
76+
bint fold=0, str box="datetime"):
7677
"""
7778
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
7879
@@ -83,6 +84,13 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
8384
convert to this timezone
8485
freq : str/Offset, default None
8586
freq to convert
87+
fold : bint, default is 0
88+
Due to daylight saving time, one wall clock time can occur twice
89+
when shifting from summer to winter time; fold describes whether the
90+
datetime-like corresponds to the first (0) or the second time (1)
91+
the wall clock hits the ambiguous time
92+
93+
.. versionadded:: 1.1.0
8694
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
8795
If datetime, convert to datetime.datetime
8896
If date, convert to datetime.date
@@ -104,7 +112,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
104112
str typ
105113
int64_t value, delta, local_value
106114
ndarray[object] result = np.empty(n, dtype=object)
107-
object (*func_create)(int64_t, npy_datetimestruct, object, object)
115+
object (*func_create)(int64_t, npy_datetimestruct, object, object, bint)
108116

109117
if box == "date":
110118
assert (tz is None), "tz should be None when converting to date"
@@ -129,7 +137,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
129137
result[i] = <object>NaT
130138
else:
131139
dt64_to_dtstruct(value, &dts)
132-
result[i] = func_create(value, dts, tz, freq)
140+
result[i] = func_create(value, dts, tz, freq, fold)
133141
elif is_tzlocal(tz):
134142
for i in range(n):
135143
value = arr[i]
@@ -141,7 +149,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
141149
# using the i8 representation.
142150
local_value = tz_convert_utc_to_tzlocal(value, tz)
143151
dt64_to_dtstruct(local_value, &dts)
144-
result[i] = func_create(value, dts, tz, freq)
152+
result[i] = func_create(value, dts, tz, freq, fold)
145153
else:
146154
trans, deltas, typ = get_dst_info(tz)
147155

@@ -155,7 +163,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
155163
else:
156164
# Adjust datetime64 timestamp, recompute datetimestruct
157165
dt64_to_dtstruct(value + delta, &dts)
158-
result[i] = func_create(value, dts, tz, freq)
166+
result[i] = func_create(value, dts, tz, freq, fold)
159167

160168
elif typ == 'dateutil':
161169
# no zone-name change for dateutil tzs - dst etc
@@ -168,7 +176,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
168176
# Adjust datetime64 timestamp, recompute datetimestruct
169177
pos = trans.searchsorted(value, side='right') - 1
170178
dt64_to_dtstruct(value + deltas[pos], &dts)
171-
result[i] = func_create(value, dts, tz, freq)
179+
result[i] = func_create(value, dts, tz, freq, fold)
172180
else:
173181
# pytz
174182
for i in range(n):
@@ -182,7 +190,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
182190
new_tz = tz._tzinfos[tz._transition_info[pos]]
183191

184192
dt64_to_dtstruct(value + deltas[pos], &dts)
185-
result[i] = func_create(value, dts, new_tz, freq)
193+
result[i] = func_create(value, dts, new_tz, freq, fold)
186194

187195
return result
188196

pandas/_libs/tslibs/conversion.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ cdef class _TSObject:
1212
npy_datetimestruct dts # npy_datetimestruct
1313
int64_t value # numpy dt64
1414
object tzinfo
15+
bint fold
1516

1617

1718
cdef convert_to_tsobject(object ts, object tz, object unit,

pandas/_libs/tslibs/conversion.pyx

+66-3
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport (
3939

4040
from pandas._libs.tslibs.tzconversion import (
4141
tz_localize_to_utc, tz_convert_single)
42-
from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc
42+
from pandas._libs.tslibs.tzconversion cimport (
43+
_tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc)
4344

4445
# ----------------------------------------------------------------------
4546
# Constants
@@ -215,6 +216,11 @@ cdef class _TSObject:
215216
# npy_datetimestruct dts # npy_datetimestruct
216217
# int64_t value # numpy dt64
217218
# object tzinfo
219+
# bint fold
220+
221+
def __cinit__(self):
222+
# GH 25057. As per PEP 495, set fold to 0 by default
223+
self.fold = 0
218224

219225
@property
220226
def value(self):
@@ -322,6 +328,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
322328
cdef:
323329
_TSObject obj = _TSObject()
324330

331+
obj.fold = ts.fold
325332
if tz is not None:
326333
tz = maybe_get_tz(tz)
327334

@@ -380,6 +387,8 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts,
380387
_TSObject obj = _TSObject()
381388
int64_t value # numpy dt64
382389
datetime dt
390+
ndarray[int64_t] trans
391+
int64_t[:] deltas
383392

384393
value = dtstruct_to_dt64(&dts)
385394
obj.dts = dts
@@ -389,10 +398,23 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts,
389398
check_overflows(obj)
390399
return obj
391400

401+
# Infer fold from offset-adjusted obj.value
402+
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
403+
if is_utc(tz):
404+
pass
405+
elif is_tzlocal(tz):
406+
_tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold)
407+
else:
408+
trans, deltas, typ = get_dst_info(tz)
409+
410+
if typ == 'dateutil':
411+
pos = trans.searchsorted(obj.value, side='right') - 1
412+
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
413+
392414
# Keep the converter same as PyDateTime's
393415
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
394416
obj.dts.hour, obj.dts.min, obj.dts.sec,
395-
obj.dts.us, obj.tzinfo)
417+
obj.dts.us, obj.tzinfo, fold=obj.fold)
396418
obj = convert_datetime_to_tsobject(
397419
dt, tz, nanos=obj.dts.ps // 1000)
398420
return obj
@@ -543,7 +565,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
543565
elif obj.value == NPY_NAT:
544566
pass
545567
elif is_tzlocal(tz):
546-
local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False)
568+
local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold)
547569
dt64_to_dtstruct(local_val, &obj.dts)
548570
else:
549571
# Adjust datetime64 timestamp, recompute datetimestruct
@@ -562,6 +584,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
562584
# i.e. treat_tz_as_dateutil(tz)
563585
pos = trans.searchsorted(obj.value, side='right') - 1
564586
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
587+
# dateutil supports fold, so we infer fold from value
588+
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
565589
else:
566590
# Note: as of 2018-07-17 all tzinfo objects that are _not_
567591
# either pytz or dateutil have is_fixed_offset(tz) == True,
@@ -571,6 +595,45 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
571595
obj.tzinfo = tz
572596

573597

598+
cdef inline bint _infer_tsobject_fold(_TSObject obj, ndarray[int64_t] trans,
599+
int64_t[:] deltas, int32_t pos):
600+
"""
601+
Infer _TSObject fold property from value by assuming 0 and then setting
602+
to 1 if necessary.
603+
604+
Parameters
605+
----------
606+
obj : _TSObject
607+
trans : ndarray[int64_t]
608+
ndarray of offset transition points in nanoseconds since epoch.
609+
deltas : int64_t[:]
610+
array of offsets corresponding to transition points in trans.
611+
pos : int32_t
612+
Position of the last transition point before taking fold into account.
613+
614+
Returns
615+
-------
616+
bint
617+
Due to daylight saving time, one wall clock time can occur twice
618+
when shifting from summer to winter time; fold describes whether the
619+
datetime-like corresponds to the first (0) or the second time (1)
620+
the wall clock hits the ambiguous time
621+
622+
References
623+
----------
624+
.. [1] "PEP 495 - Local Time Disambiguation"
625+
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
626+
"""
627+
cdef:
628+
bint fold = 0
629+
630+
if pos > 0:
631+
fold_delta = deltas[pos - 1] - deltas[pos]
632+
if obj.value - fold_delta < trans[pos]:
633+
fold = 1
634+
635+
return fold
636+
574637
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
575638
"""
576639
Take a datetime/Timestamp in UTC and localizes to timezone tz.

pandas/_libs/tslibs/timestamps.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct
55

66
cdef object create_timestamp_from_ts(int64_t value,
77
npy_datetimestruct dts,
8-
object tz, object freq)
8+
object tz, object freq, bint fold)

0 commit comments

Comments
 (0)