diff --git a/doc/source/api.rst b/doc/source/api.rst index 77d37ec2a7b2e..ded4e1e3c55e9 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -582,6 +582,7 @@ These can be accessed like ``Series.dt.``. Series.dt.to_pydatetime Series.dt.tz_localize Series.dt.tz_convert + Series.dt.is_dst Series.dt.normalize Series.dt.strftime Series.dt.round @@ -1778,6 +1779,7 @@ Time-specific operations DatetimeIndex.snap DatetimeIndex.tz_convert DatetimeIndex.tz_localize + DatetimeIndex.is_dst DatetimeIndex.round DatetimeIndex.floor DatetimeIndex.ceil @@ -1985,6 +1987,7 @@ Methods Timestamp.isocalendar Timestamp.isoformat Timestamp.isoweekday + Timestamp.is_dst Timestamp.month_name Timestamp.normalize Timestamp.now diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3a360b09ae789..ff0bbeeb34870 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :class:`DatetimeIndex` and :class:`Timestamp` have gained an ``is_dst`` method (:issue:`18885`, :issue:`18946`) .. _whatsnew_0240.api_breaking: @@ -619,6 +620,8 @@ Timezones - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DatetimeIndex.floor` that raised an ``AmbiguousTimeError`` during a DST transition (:issue:`18946`) +- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bdd279b19208b..fc53b88fd4cfe 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -146,7 +146,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): dt64_to_dtstruct(local_value, &dts) result[i] = func_create(value, dts, tz, freq) else: - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..14939427c2da3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -28,11 +28,10 @@ from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, is_datetime64_object, - is_integer_object, is_float_object, is_array) + is_integer_object, is_float_object) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string @@ -540,7 +539,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if is_fixed_offset(tz): # static/fixed tzinfo; in this case we know len(deltas) == 1 @@ -636,7 +635,7 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, int64_t[:] deltas int64_t v - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if not to_utc: # We add `offset` below instead of subtracting it deltas = -1 * np.array(deltas, dtype='i8') @@ -888,7 +887,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) @@ -1150,7 +1149,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz): result[i] = _normalized_stamp(&dts) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 @@ -1227,7 +1226,7 @@ def is_date_array_normalized(int64_t[:] stamps, tz=None): if (dts.hour + dts.min + dts.sec + dts.us) > 0: return False else: - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..e05572f66525d 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -260,6 +260,20 @@ class NaTType(_NaT): def is_year_end(self): return False + def is_dst(self): + """ + Returns a boolean indicating if the Timestamp is in daylight savings + time. Naive timestamps are considered not to be in daylight savings + time. + + Returns + ------- + Boolean + True if the Timestamp is in daylight savings time + False if the Timestamp is naive or not in daylight savings time + """ + return False + def __rdiv__(self, other): return _nat_rdivide_op(self, other) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f68b6d8fdef57..6c29fd0280b02 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1516,7 +1516,7 @@ cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps, result[i] = get_period_ordinal(&dts, freq) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4e3350395400c..7ec3c0062c9e3 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -68,7 +68,7 @@ cdef _reso_local(int64_t[:] stamps, object tz): reso = curr_reso else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ab1396c0fe38..c06288f6cf23c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -722,6 +722,20 @@ class Timestamp(_Timestamp): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + def is_dst(self): + """ + Returns a boolean indicating if the Timestamp is in daylight savings + time. Naive timestamps are considered not to be in daylight savings + time. + + Returns + ------- + Boolean + True if the Timestamp is in daylight savings time + False if the Timestamp is naive or not in daylight savings time + """ + return bool(self.dst()) + def __setstate__(self, state): self.value = state[0] self.freq = state[1] diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 8965b46f747c4..638bd0e79c806 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -13,4 +13,4 @@ cpdef object maybe_get_tz(object tz) cdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) -cdef object get_dst_info(object tz) +cdef object get_dst_info(object tz, bint dst) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 36ec499c7335c..e32cab36a4e82 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -108,7 +108,8 @@ def _p_tz_cache_key(tz): return tz_cache_key(tz) -# Timezone data caches, key is the pytz string or dateutil file name. +# Timezone data (UTC offset) caches +# key is the pytz string or dateutil file name. dst_cache = {} @@ -186,16 +187,30 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -cdef int64_t[:] unbox_utcoffsets(object transinfo): +cdef int64_t[:] unbox_utcoffsets(object transinfo, bint dst): + """ + Unpack the offset information from the _transition_info attribute of pytz + timezones + + Parameters + ---------- + transinfo : list of tuples + Each tuple contains (UTC offset, DST offset, tz abbreviation) + dst : boolean + True returns an array of the DST offsets + False returns an array of UTC offsets + """ cdef: Py_ssize_t i, sz int64_t[:] arr + int key sz = len(transinfo) arr = np.empty(sz, dtype='i8') - for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + # If dst == True, extract the DST shift in nanoseconds + # If dst == False, extract the UTC offset in nanoseconds + arr[i] = int(transinfo[i][dst].total_seconds()) * 1000000000 return arr @@ -204,9 +219,23 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): # Daylight Savings -cdef object get_dst_info(object tz): +cdef object get_dst_info(object tz, bint dst): """ - return a tuple of : + Return DST info from a timezone + + Parameters + ---------- + tz : object + timezone object + dst : bint + True returns the DST specific offset and will NOT store the results in + dst_cache. dst_cache is reserved for caching UTC offsets. + False returns the UTC offset + Specific for pytz timezones only + + Returns + ------- + tuple (UTC times of DST transitions, UTC offsets in microseconds corresponding to DST transitions, string of type of transitions) @@ -221,7 +250,7 @@ cdef object get_dst_info(object tz): np.array([num], dtype=np.int64), None) - if cache_key not in dst_cache: + if cache_key not in dst_cache or dst: if treat_tz_as_pytz(tz): trans = np.array(tz._utc_transition_times, dtype='M8[ns]') trans = trans.view('i8') @@ -230,7 +259,7 @@ cdef object get_dst_info(object tz): trans[0] = NPY_NAT + 1 except Exception: pass - deltas = unbox_utcoffsets(tz._transition_info) + deltas = unbox_utcoffsets(tz._transition_info, dst) typ = 'pytz' elif treat_tz_as_dateutil(tz): @@ -273,11 +302,50 @@ cdef object get_dst_info(object tz): deltas = np.array([num], dtype=np.int64) typ = 'static' + if dst: + return trans, deltas, typ dst_cache[cache_key] = (trans, deltas, typ) return dst_cache[cache_key] +def is_dst(int64_t[:] values, object tz): + """ + Return a boolean array indicating whether each epoch timestamp is in + daylight savings time with respect with the passed timezone. + + Parameters + ---------- + values : ndarray + i8 representation of the datetimes + tz : object + timezone + + Returns + ------- + ndarray of booleans + True indicates daylight savings time + """ + cdef: + Py_ssize_t n = len(values) + object typ + + result = np.zeros(n, dtype=bool) + if tz is None: + return result + transitions, offsets, typ = get_dst_info(tz, True) + offsets = np.array(offsets) + + # Fixed timezone offsets do not have DST transitions + if typ not in {'pytz', 'dateutil'}: + return result + positions = transitions.searchsorted(values, side='right') - 1 + + # DST has nonzero offset + result = offsets[positions] != 0 + return result + + def infer_tzinfo(start, end): if start is not None and end is not None: tz = start.tzinfo diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f8c07fe7cd21..c6ef015e1f54a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -284,7 +284,7 @@ def _ensure_localized(self, result): if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=self.is_dst()) return result def _box_values_as_index(self): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..50dee697b39fb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -266,7 +266,7 @@ def _add_comparison_methods(cls): _datetimelike_methods = ['to_period', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', - 'ceil', 'month_name', 'day_name'] + 'ceil', 'month_name', 'day_name', 'is_dst'] _is_numeric_dtype = False _infer_as_myclass = True @@ -443,6 +443,36 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + def is_dst(self): + """ + Returns an Index of booleans indicating if each corresponding timestamp + is in daylight savings time. + + If the DatetimeIndex does not have a timezone, returns an Index + who's values are all False. + + Returns + ------- + Index + True if the timestamp is in daylight savings time else False + + Example + ------- + >>> dti = pd.date_range('2018-11-04', periods=4, freq='H', + tz='US/Pacific') + + >>> dti + DatetimeIndex(['2018-11-04 00:00:00-07:00', + '2018-11-04 01:00:00-07:00', + '2018-11-04 01:00:00-08:00', + '2018-11-04 02:00:00-08:00'], + dtype='datetime64[ns, US/Pacific]', freq='H') + + >>> dti.is_dst() + Index([True, True, False, False], dtype='object') + """ + return Index(timezones.is_dst(self.asi8, self.tz)) + @property def size(self): # TODO: Remove this when we have a DatetimeTZArray diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 95531b2d7a7ae..2f5f4650ba948 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1012,6 +1012,22 @@ def test_iteration_preserves_nanoseconds(self, tz): for i, ts in enumerate(index): assert ts == index[i] + @pytest.mark.parametrize('arg, expected_arg', [ + [[], []], + [date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific'), + [True, True, False, False]], + [date_range('2018-11-04', periods=4, freq='H'), + [False] * 4], + [date_range('2018-11-04', periods=4, freq='H', tz=pytz.FixedOffset(3)), + [False] * 4], + [[pd.NaT], [False]] + ]) + def test_is_dst(self, arg, expected_arg): + dti = DatetimeIndex(arg) + result = dti.is_dst() + expected = Index(expected_arg) + tm.assert_index_equal(result, expected) + class TestDateRange(object): """Tests for date_range with timezones""" diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 42df4511578f1..2ec5c2b78a04f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_datetime64tz_with_dst_transition(self): + # GH 18885 + df1 = pd.DataFrame(pd.date_range( + '2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'), + columns=['date']) + df1['value'] = 1 + df2 = pd.DataFrame([ + pd.to_datetime('2017-10-29 03:00:00'), + pd.to_datetime('2017-10-29 04:00:00'), + pd.to_datetime('2017-10-29 05:00:00') + ], + columns=['date']) + df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert( + 'Europe/Madrid') + df2['value'] = 2 + result = pd.merge(df1, df2, how='outer', on='date') + expected = pd.DataFrame({ + 'date': pd.date_range( + '2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'), + 'value_x': [1] * 4 + [np.nan] * 3, + 'value_y': [np.nan] * 4 + [2] * 3 + }) + assert_frame_equal(result, expected) + def test_merge_non_unique_period_index(self): # GH #16871 index = pd.period_range('2016-01-01', periods=16, freq='M') diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index a6b217a37bd0c..495532e778131 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -330,3 +330,7 @@ def test_nat_arithmetic_td64_vector(box, assert_func): def test_nat_pinned_docstrings(): # GH17327 assert NaT.ctime.__doc__ == datetime.ctime.__doc__ + + +def test_is_dst(): + assert NaT.is_dst() is False diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 8cebfafeae82a..54977cc7bfbf8 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -307,3 +307,15 @@ def test_timestamp_timetz_equivalent_with_datetime_tz(self, expected = _datetime.timetz() assert result == expected + + @pytest.mark.parametrize('tz', ['US/Pacific', 'dateutil/US/Pacific']) + def test_timestamp_is_dst(self, tz): + ts_naive = Timestamp('2018-11-04') + assert ts_naive.is_dst() is False + + ts_aware = ts_naive.tz_localize(tz) + assert ts_aware.is_dst() is True + + # DST transition at 2am + ts_aware = Timestamp('2018-11-04 04:00').tz_localize(tz) + assert ts_aware.is_dst() is False diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 5b45c6003a005..9921d880ff78a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -37,7 +37,8 @@ def test_dt_namespace_accessor(self): ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', - 'floor', 'ceil', 'day_name', 'month_name'] + 'floor', 'ceil', 'day_name', 'month_name', + 'is_dst'] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] @@ -95,42 +96,6 @@ def compare(s, name): expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) - # round - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.round('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # round with tz - result = (s.dt.tz_localize('UTC') - .dt.tz_convert('US/Eastern') - .dt.round('D')) - exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']).tz_localize('US/Eastern') - expected = Series(exp_values, name='xxx') - tm.assert_series_equal(result, expected) - - # floor - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.floor('D') - expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # ceil - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.ceil('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-02']), name='xxx') - tm.assert_series_equal(result, expected) - # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') @@ -261,6 +226,45 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 + @pytest.mark.parametrize('method, dates', [ + ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], + ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], + ['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']] + ]) + def test_dt_round(self, method, dates): + # round + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = getattr(s.dt, method)('D') + expected = Series(pd.to_datetime(dates), name='xxx') + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + + result = (s.dt.tz_localize('UTC') + .dt.tz_convert('US/Eastern') + .dt.round('D')) + exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', + '2012-01-01']).tz_localize('US/Eastern') + expected = Series(exp_values, name='xxx') + tm.assert_series_equal(result, expected) + + # GH 18946 round near DST + df1 = pd.DataFrame([ + pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True), + pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True), + pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True) + ], + columns=['date']) + df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid') + result = df1.date.dt.floor('H') + expected = df1['date'] + tm.assert_series_equal(result, expected) + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2)