Skip to content

ENH/BUG: Add is_dst method to DatetimeIndex and Timestamp to solve AmbiguousTimeError #22560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
3 changes: 3 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,7 @@ These can be accessed like ``Series.dt.<property>``.
Series.dt.to_pydatetime
Series.dt.tz_localize
Series.dt.tz_convert
Series.dt.is_dst
Series.dt.normalize
Series.dt.strftime
Series.dt.round
Expand Down Expand Up @@ -1778,6 +1779,7 @@ Time-specific operations
DatetimeIndex.snap
DatetimeIndex.tz_convert
DatetimeIndex.tz_localize
DatetimeIndex.is_dst
DatetimeIndex.round
DatetimeIndex.floor
DatetimeIndex.ceil
Expand Down Expand Up @@ -1985,6 +1987,7 @@ Methods
Timestamp.isocalendar
Timestamp.isoformat
Timestamp.isoweekday
Timestamp.is_dst
Timestamp.month_name
Timestamp.normalize
Timestamp.now
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ Other Enhancements
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
- :class:`DatetimeIndex` and :class:`Timestamp` have gained an ``is_dst`` method (:issue:`18885`, :issue:`18946`)

.. _whatsnew_0240.api_breaking:

Expand Down Expand Up @@ -615,6 +616,8 @@ Timezones
- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`)
- Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`)
- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`)
- Bug in :meth:`DatetimeIndex.floor` that raised an ``AmbiguousTimeError`` during a DST transition (:issue:`18946`)
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)

Offsets
^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"):
dt64_to_dtstruct(local_value, &dts)
result[i] = func_create(value, dts, tz, freq)
else:
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
Expand Down
13 changes: 6 additions & 7 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@ from np_datetime import OutOfBoundsDatetime

from util cimport (is_string_object,
is_datetime64_object,
is_integer_object, is_float_object, is_array)
is_integer_object, is_float_object)

from timedeltas cimport cast_from_unit
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
treat_tz_as_dateutil, treat_tz_as_pytz,
get_utcoffset, get_dst_info,
get_timezone, maybe_get_tz, tz_compare)
from parsing import parse_datetime_string
Expand Down Expand Up @@ -540,7 +539,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
dt64_to_dtstruct(local_val, &obj.dts)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if is_fixed_offset(tz):
# static/fixed tzinfo; in this case we know len(deltas) == 1
Expand Down Expand Up @@ -636,7 +635,7 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
int64_t[:] deltas
int64_t v

trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)
if not to_utc:
# We add `offset` below instead of subtracting it
deltas = -1 * np.array(deltas, dtype='i8')
Expand Down Expand Up @@ -888,7 +887,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
"the same size as vals")
ambiguous_array = np.asarray(ambiguous)

trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

tdata = <int64_t*> cnp.PyArray_DATA(trans)
ntrans = len(trans)
Expand Down Expand Up @@ -1150,7 +1149,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
result[i] = _normalized_stamp(&dts)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
Expand Down Expand Up @@ -1227,7 +1226,7 @@ def is_date_array_normalized(int64_t[:] stamps, tz=None):
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
return False
else:
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ class NaTType(_NaT):
strptime = _make_error_func('strptime', datetime)
strftime = _make_error_func('strftime', datetime)
isocalendar = _make_error_func('isocalendar', datetime)
dst = _make_error_func('dst', datetime)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why removing?

ctime = _make_error_func('ctime', datetime)
time = _make_error_func('time', datetime)
toordinal = _make_error_func('toordinal', datetime)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1516,7 +1516,7 @@ cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps,
result[i] = get_period_ordinal(&dts, freq)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/resolution.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ cdef _reso_local(int64_t[:] stamps, object tz):
reso = curr_reso
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
trans, deltas, typ = get_dst_info(tz, False)

if typ not in ['pytz', 'dateutil']:
# static/fixed; in this case we know that len(delta) == 1
Expand Down
14 changes: 14 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,20 @@ class Timestamp(_Timestamp):
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

def is_dst(self):
"""
Returns a boolean indicating if the Timestamp is in daylight savings
time. Naive timestamps are considered not to be in daylight savings
time.

Returns
-------
Boolean
True if the Timestamp is in daylight savings time
False if the Timestamp is naive or not in daylight savings time
"""
return bool(self.dst())

def __setstate__(self, state):
self.value = state[0]
self.freq = state[1]
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timezones.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ cpdef object maybe_get_tz(object tz)
cdef get_utcoffset(tzinfo, obj)
cdef bint is_fixed_offset(object tz)

cdef object get_dst_info(object tz)
cdef object get_dst_info(object tz, dst)
75 changes: 69 additions & 6 deletions pandas/_libs/tslibs/timezones.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,28 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz):
return new_trans


cdef int64_t[:] unbox_utcoffsets(object transinfo):
cdef int64_t[:] unbox_utcoffsets(object transinfo, dst):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add bint to type dst

"""
Unpack the offset information from pytz timezone objects

Parameters
----------
transinfo : list of tuples
Each tuple contains (UTC offset, DST offset, tz abbreviation)
dst : boolean
True returns an array of the DST offsets
False returns an array of UTC offsets
"""
cdef:
Py_ssize_t i, sz
int64_t[:] arr
int key

sz = len(transinfo)
arr = np.empty(sz, dtype='i8')

key = int(dst)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you may not need this if typing bint

for i in range(sz):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment about what you are extracting

arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000
arr[i] = int(transinfo[i][key].total_seconds()) * 1000000000

return arr

Expand All @@ -204,9 +216,22 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo):
# Daylight Savings


cdef object get_dst_info(object tz):
cdef object get_dst_info(object tz, dst):
"""
return a tuple of :
Return DST info from a timezone

Parameters
----------
tz : object
timezone
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we accept only timezone objects and not strings here?

dst : bool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type with bitn

True returns the DST specific offset
False returns the UTC offset
Specific for pytz timezones only

Returns
-------
tuple
(UTC times of DST transitions,
UTC offsets in microseconds corresponding to DST transitions,
string of type of transitions)
Expand All @@ -230,7 +255,7 @@ cdef object get_dst_info(object tz):
trans[0] = NPY_NAT + 1
except Exception:
pass
deltas = unbox_utcoffsets(tz._transition_info)
deltas = unbox_utcoffsets(tz._transition_info, dst)
typ = 'pytz'

elif treat_tz_as_dateutil(tz):
Expand Down Expand Up @@ -278,6 +303,44 @@ cdef object get_dst_info(object tz):
return dst_cache[cache_key]


def _is_dst(int64_t[:] values, object tz):
"""
Return a boolean array indicating whether each epoch timestamp is in
daylight savings time with respect with the passed timezone.

Parameters
----------
values : ndarray
i8 representation of the datetimes
tz : object
timezone

Returns
-------
ndarray of booleans
True indicates daylight savings time
"""
cdef:
Py_ssize_t n = len(values)
# Cython boolean memoryviews are not supported yet
# https://github.com/cython/cython/issues/2204
# bint[:] result
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the workaround is to use a uint8

object typ

result = np.zeros(n, dtype=bool)
if tz is None:
return result
transitions, offsets, typ = get_dst_info(tz, True)
offsets = np.array(offsets)
# Fixed timezone offsets do not have DST transitions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

new line here

if typ not in {'pytz', 'dateutil'}:
return result
positions = transitions.searchsorted(values, side='right')
# DST has 0 offset
result = offsets[positions] == 0
return result


def infer_tzinfo(start, end):
if start is not None and end is not None:
tz = start.tzinfo
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def _ensure_localized(self, result):
if getattr(self, 'tz', None) is not None:
if not isinstance(result, ABCIndexClass):
result = self._simple_new(result)
result = result.tz_localize(self.tz)
result = result.tz_localize(self.tz, ambiguous=self.is_dst())
return result

def _box_values_as_index(self):
Expand Down
32 changes: 31 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def _add_comparison_methods(cls):
_datetimelike_methods = ['to_period', 'tz_localize',
'tz_convert',
'normalize', 'strftime', 'round', 'floor',
'ceil', 'month_name', 'day_name']
'ceil', 'month_name', 'day_name', 'is_dst']

_is_numeric_dtype = False
_infer_as_myclass = True
Expand Down Expand Up @@ -443,6 +443,36 @@ def tz(self, value):
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

def is_dst(self):
"""
Returns an Index of booleans indicating if each corresponding timestamp
is in daylight savings time.

If the DatetimeIndex does not have a timezone, returns an Index
who's values are all False.

Returns
-------
Index
True if the timestamp is in daylight savings time else False

Example
-------
>>> dti = pd.date_range('2018-11-04', periods=4, freq='H',
tz='US/Pacific')

>>> dti
DatetimeIndex(['2018-11-04 00:00:00-07:00',
'2018-11-04 01:00:00-07:00',
'2018-11-04 01:00:00-08:00',
'2018-11-04 02:00:00-08:00'],
dtype='datetime64[ns, US/Pacific]', freq='H')

>>> dti.is_dst()
Index([True, True, False, False], dtype='object')
"""
return Index(timezones._is_dst(self.asi8, self.tz))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make is_dst public?


@property
def size(self):
# TODO: Remove this when we have a DatetimeTZArray
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,32 @@ def test_iteration_preserves_nanoseconds(self, tz):
for i, ts in enumerate(index):
assert ts == index[i]

def test_is_dst(self):
dti = DatetimeIndex([])
result = dti.is_dst()
expected = Index([])
tm.assert_index_equal(result, expected)

dti = date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific')
result = dti.is_dst()
expected = Index([True, True, False, False])
tm.assert_index_equal(result, expected)

dti_naive = dti.tz_localize(None)
result = dti_naive.is_dst()
expected = Index([False] * 4)
tm.assert_index_equal(result, expected)

dti_fixed = dti.tz_localize(pytz.FixedOffset(300))
result = dti_fixed.is_dst()
expected = Index([False] * 4)
tm.assert_index_equal(result, expected)

dti_nat = pd.DatetimeIndex([pd.NaT])
result = dti_nat.is_dst()
expected = Index([False])
tm.assert_index_equal(result, expected)


class TestDateRange(object):
"""Tests for date_range with timezones"""
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self):
assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]'
assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]'

def test_merge_datetime64tz_with_dst_transition(self):
# GH 18885
df1 = pd.DataFrame(pd.date_range(
'2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'),
columns=['date'])
df1['value'] = 1
df2 = pd.DataFrame([
pd.to_datetime('2017-10-29 03:00:00'),
pd.to_datetime('2017-10-29 04:00:00'),
pd.to_datetime('2017-10-29 05:00:00')
],
columns=['date'])
df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert(
'Europe/Madrid')
df2['value'] = 2
result = pd.merge(df1, df2, how='outer', on='date')
expected = pd.DataFrame({
'date': pd.date_range(
'2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'),
'value_x': [1] * 4 + [np.nan] * 3,
'value_y': [np.nan] * 4 + [2] * 3
})
assert_frame_equal(result, expected)

def test_merge_non_unique_period_index(self):
# GH #16871
index = pd.period_range('2016-01-01', periods=16, freq='M')
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/scalar/test_nat.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,7 @@ def test_nat_arithmetic_td64_vector(box, assert_func):
def test_nat_pinned_docstrings():
# GH17327
assert NaT.ctime.__doc__ == datetime.ctime.__doc__


def test_is_dst():
assert NaT.is_dst() is False
Loading