From be2b6dd7ff099b8ea219518cb7c72c272864c1ee Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 30 Aug 2018 23:43:59 -0700 Subject: [PATCH 01/11] ENH: Add is_dst method to DatetimeIndex --- doc/source/api.rst | 2 ++ doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 34 ++++++++++++++++++- .../tests/indexes/datetimes/test_timezones.py | 11 ++++++ pandas/tests/series/test_datetime_values.py | 3 +- 6 files changed, 50 insertions(+), 3 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 77d37ec2a7b2e..1b299d03f13f6 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -582,6 +582,7 @@ These can be accessed like ``Series.dt.``. Series.dt.to_pydatetime Series.dt.tz_localize Series.dt.tz_convert + Series.dt.is_dst Series.dt.normalize Series.dt.strftime Series.dt.round @@ -1778,6 +1779,7 @@ Time-specific operations DatetimeIndex.snap DatetimeIndex.tz_convert DatetimeIndex.tz_localize + DatetimeIndex.is_dst DatetimeIndex.round DatetimeIndex.floor DatetimeIndex.ceil diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index be16806cb4d1e..3de38b2b3dcf9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :class:`DatetimeIndex` has gained a `is_dst` method. .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3f8c07fe7cd21..c6ef015e1f54a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -284,7 +284,7 @@ def _ensure_localized(self, result): if getattr(self, 'tz', None) is not None: if not isinstance(result, ABCIndexClass): result = self._simple_new(result) - result = result.tz_localize(self.tz) + result = result.tz_localize(self.tz, ambiguous=self.is_dst()) return result def _box_values_as_index(self): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..2358b1f2fb33d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -266,7 +266,7 @@ def _add_comparison_methods(cls): _datetimelike_methods = ['to_period', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', - 'ceil', 'month_name', 'day_name'] + 'ceil', 'month_name', 'day_name', 'is_dst'] _is_numeric_dtype = False _infer_as_myclass = True @@ -443,6 +443,38 @@ def tz(self, value): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + def is_dst(self): + """ + Returns an Index of booleans indicating if each corresponding timestamp + is in daylight savings time. + + If the DatetimeIndex does not have a timezone, returns an Index + who's values are all False. + + Returns + ------- + Index + True if the timestamp is in daylight savings time else False + + Example + ------- + >>> dti = pd.date_range('2018-11-04', periods=4, freq='H', + tz='US/Pacific') + + >>> dti + DatetimeIndex(['2018-11-04 00:00:00-07:00', + '2018-11-04 01:00:00-07:00', + '2018-11-04 01:00:00-08:00', + '2018-11-04 02:00:00-08:00'], + dtype='datetime64[ns, US/Pacific]', freq='H') + + >>> dti.is_dst() + Index([True, True, False, False], dtype='object') + """ + if self.tz is None: + return Index([False] * len(self)) + return Index([bool(ts.dst()) for ts in self]) + @property def size(self): # TODO: Remove this when we have a DatetimeTZArray diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 95531b2d7a7ae..55eb0468d7401 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1012,6 +1012,17 @@ def test_iteration_preserves_nanoseconds(self, tz): for i, ts in enumerate(index): assert ts == index[i] + def test_is_dst(self): + dti = pd.date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific') + result = dti.is_dst() + expected = Index([True, True, False, False]) + tm.assert_index_equal(result, expected) + + dti_naive = dti.tz_localize(None) + result = dti_naive.is_dst() + expected = Index([False] * 4) + tm.assert_index_equal(result, expected) + class TestDateRange(object): """Tests for date_range with timezones""" diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 5b45c6003a005..3b3fe2983d61a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -37,7 +37,8 @@ def test_dt_namespace_accessor(self): ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', - 'floor', 'ceil', 'day_name', 'month_name'] + 'floor', 'ceil', 'day_name', 'month_name', + 'is_dst'] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] From 18f8611c34dcdc73138eb43fcbeb6b613f0fc58b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 00:20:55 -0700 Subject: [PATCH 02/11] Address merge issue --- pandas/_libs/tslibs/nattype.pyx | 1 - pandas/tests/reshape/merge/test_merge.py | 24 +++++++ pandas/tests/series/test_datetime_values.py | 75 +++++++++++---------- 3 files changed, 63 insertions(+), 37 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..c0263b747b081 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -353,7 +353,6 @@ class NaTType(_NaT): strptime = _make_error_func('strptime', datetime) strftime = _make_error_func('strftime', datetime) isocalendar = _make_error_func('isocalendar', datetime) - dst = _make_error_func('dst', datetime) ctime = _make_error_func('ctime', datetime) time = _make_error_func('time', datetime) toordinal = _make_error_func('toordinal', datetime) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 42df4511578f1..2ec5c2b78a04f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -601,6 +601,30 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_datetime64tz_with_dst_transition(self): + # GH 18885 + df1 = pd.DataFrame(pd.date_range( + '2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'), + columns=['date']) + df1['value'] = 1 + df2 = pd.DataFrame([ + pd.to_datetime('2017-10-29 03:00:00'), + pd.to_datetime('2017-10-29 04:00:00'), + pd.to_datetime('2017-10-29 05:00:00') + ], + columns=['date']) + df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert( + 'Europe/Madrid') + df2['value'] = 2 + result = pd.merge(df1, df2, how='outer', on='date') + expected = pd.DataFrame({ + 'date': pd.date_range( + '2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'), + 'value_x': [1] * 4 + [np.nan] * 3, + 'value_y': [np.nan] * 4 + [2] * 3 + }) + assert_frame_equal(result, expected) + def test_merge_non_unique_period_index(self): # GH #16871 index = pd.period_range('2016-01-01', periods=16, freq='M') diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 3b3fe2983d61a..9921d880ff78a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -96,42 +96,6 @@ def compare(s, name): expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) - # round - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.round('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # round with tz - result = (s.dt.tz_localize('UTC') - .dt.tz_convert('US/Eastern') - .dt.round('D')) - exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']).tz_localize('US/Eastern') - expected = Series(exp_values, name='xxx') - tm.assert_series_equal(result, expected) - - # floor - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.floor('D') - expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', - '2012-01-01']), name='xxx') - tm.assert_series_equal(result, expected) - - # ceil - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00']), name='xxx') - result = s.dt.ceil('D') - expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', - '2012-01-02']), name='xxx') - tm.assert_series_equal(result, expected) - # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') @@ -262,6 +226,45 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 + @pytest.mark.parametrize('method, dates', [ + ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], + ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], + ['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']] + ]) + def test_dt_round(self, method, dates): + # round + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + result = getattr(s.dt, method)('D') + expected = Series(pd.to_datetime(dates), name='xxx') + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + s = Series(pd.to_datetime(['2012-01-01 13:00:00', + '2012-01-01 12:01:00', + '2012-01-01 08:00:00']), name='xxx') + + result = (s.dt.tz_localize('UTC') + .dt.tz_convert('US/Eastern') + .dt.round('D')) + exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', + '2012-01-01']).tz_localize('US/Eastern') + expected = Series(exp_values, name='xxx') + tm.assert_series_equal(result, expected) + + # GH 18946 round near DST + df1 = pd.DataFrame([ + pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True), + pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True), + pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True) + ], + columns=['date']) + df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid') + result = df1.date.dt.floor('H') + expected = df1['date'] + tm.assert_series_equal(result, expected) + def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(['20171111', '20181212']).repeat(2) From 442f8882528f248113dea85a4e1a3102d7b0a41a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 00:28:24 -0700 Subject: [PATCH 03/11] Add test for Nat --- doc/source/whatsnew/v0.24.0.txt | 4 +++- pandas/tests/indexes/datetimes/test_timezones.py | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3de38b2b3dcf9..80e583f86e19d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,7 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). -- :class:`DatetimeIndex` has gained a `is_dst` method. +- :class:`DatetimeIndex` has gained a ``is_dst`` method (:issue:`18885`, :issue:`18946`) .. _whatsnew_0240.api_breaking: @@ -616,6 +616,8 @@ Timezones - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DatetimeIndex.floor` that raised an ``AmbiguousTimeError`` during a DST transition (:issue:`18946`) +- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) Offsets ^^^^^^^ diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 55eb0468d7401..6af7a2362093c 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1023,6 +1023,11 @@ def test_is_dst(self): expected = Index([False] * 4) tm.assert_index_equal(result, expected) + dti_nat = pd.DatetimeIndex([pd.NaT]) + result = dti_nat.is_dst() + expected = Index([False]) + tm.assert_index_equal(result, expected) + class TestDateRange(object): """Tests for date_range with timezones""" From 254b5a2b36b35bad4c744868157663598bc41153 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 14:28:28 -0700 Subject: [PATCH 04/11] Add is_dst for timestamps --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/_libs/tslibs/timestamps.pyx | 14 ++++++++++++++ pandas/tests/scalar/test_nat.py | 4 ++++ pandas/tests/scalar/timestamp/test_timezones.py | 11 +++++++++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 1b299d03f13f6..ded4e1e3c55e9 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1987,6 +1987,7 @@ Methods Timestamp.isocalendar Timestamp.isoformat Timestamp.isoweekday + Timestamp.is_dst Timestamp.month_name Timestamp.normalize Timestamp.now diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 80e583f86e19d..e2109618b5ccd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,7 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). -- :class:`DatetimeIndex` has gained a ``is_dst`` method (:issue:`18885`, :issue:`18946`) +- :class:`DatetimeIndex` and :class:`Timestamp` have gained a ``is_dst`` method (:issue:`18885`, :issue:`18946`) .. _whatsnew_0240.api_breaking: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ab1396c0fe38..c06288f6cf23c 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -722,6 +722,20 @@ class Timestamp(_Timestamp): raise AttributeError("Cannot directly set timezone. Use tz_localize() " "or tz_convert() as appropriate") + def is_dst(self): + """ + Returns a boolean indicating if the Timestamp is in daylight savings + time. Naive timestamps are considered not to be in daylight savings + time. + + Returns + ------- + Boolean + True if the Timestamp is in daylight savings time + False if the Timestamp is naive or not in daylight savings time + """ + return bool(self.dst()) + def __setstate__(self, state): self.value = state[0] self.freq = state[1] diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index a6b217a37bd0c..495532e778131 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -330,3 +330,7 @@ def test_nat_arithmetic_td64_vector(box, assert_func): def test_nat_pinned_docstrings(): # GH17327 assert NaT.ctime.__doc__ == datetime.ctime.__doc__ + + +def test_is_dst(): + assert NaT.is_dst() is False diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 8cebfafeae82a..7e7d1ee60856c 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -307,3 +307,14 @@ def test_timestamp_timetz_equivalent_with_datetime_tz(self, expected = _datetime.timetz() assert result == expected + + def test_timestamp_is_dst(self): + ts_naive = Timestamp('2018-11-04') + assert ts_naive.is_dst() is False + + ts_aware = ts_naive.tz_localize('US/Pacific') + assert ts_aware.is_dst() is True + + # DST transition at 2am + ts_aware = Timestamp('2018-11-04 04:00').tz_localize('US/Pacific') + assert ts_aware.is_dst() is False From 7c73d0a6ea5287130f114aaee5039d244c5d1013 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 17:01:07 -0700 Subject: [PATCH 05/11] Add cython version of dst --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e2109618b5ccd..8c7d5636247bb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,7 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). -- :class:`DatetimeIndex` and :class:`Timestamp` have gained a ``is_dst`` method (:issue:`18885`, :issue:`18946`) +- :class:`DatetimeIndex` and :class:`Timestamp` have gained an ``is_dst`` method (:issue:`18885`, :issue:`18946`) .. _whatsnew_0240.api_breaking: From b0938d5e92a97da93e3d7e0210b6775ed7292160 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 18:25:54 -0700 Subject: [PATCH 06/11] Add cython version of is_dst --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 13 +++-- pandas/_libs/tslibs/period.pyx | 2 +- pandas/_libs/tslibs/resolution.pyx | 2 +- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 76 +++++++++++++++++++++++++++--- pandas/core/indexes/datetimes.py | 4 +- 7 files changed, 81 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bdd279b19208b..fc53b88fd4cfe 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -146,7 +146,7 @@ def ints_to_pydatetime(int64_t[:] arr, tz=None, freq=None, box="datetime"): dt64_to_dtstruct(local_value, &dts) result[i] = func_create(value, dts, tz, freq) else: - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..14939427c2da3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -28,11 +28,10 @@ from np_datetime import OutOfBoundsDatetime from util cimport (is_string_object, is_datetime64_object, - is_integer_object, is_float_object, is_array) + is_integer_object, is_float_object) from timedeltas cimport cast_from_unit from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_dateutil, treat_tz_as_pytz, get_utcoffset, get_dst_info, get_timezone, maybe_get_tz, tz_compare) from parsing import parse_datetime_string @@ -540,7 +539,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz): dt64_to_dtstruct(local_val, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if is_fixed_offset(tz): # static/fixed tzinfo; in this case we know len(deltas) == 1 @@ -636,7 +635,7 @@ cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, int64_t[:] deltas int64_t v - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if not to_utc: # We add `offset` below instead of subtracting it deltas = -1 * np.array(deltas, dtype='i8') @@ -888,7 +887,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, "the same size as vals") ambiguous_array = np.asarray(ambiguous) - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) @@ -1150,7 +1149,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz): result[i] = _normalized_stamp(&dts) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 @@ -1227,7 +1226,7 @@ def is_date_array_normalized(int64_t[:] stamps, tz=None): if (dts.hour + dts.min + dts.sec + dts.us) > 0: return False else: - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f68b6d8fdef57..6c29fd0280b02 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1516,7 +1516,7 @@ cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps, result[i] = get_period_ordinal(&dts, freq) else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4e3350395400c..7ec3c0062c9e3 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -68,7 +68,7 @@ cdef _reso_local(int64_t[:] stamps, object tz): reso = curr_reso else: # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) + trans, deltas, typ = get_dst_info(tz, False) if typ not in ['pytz', 'dateutil']: # static/fixed; in this case we know that len(delta) == 1 diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 8965b46f747c4..94eeaccf6284f 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -13,4 +13,4 @@ cpdef object maybe_get_tz(object tz) cdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) -cdef object get_dst_info(object tz) +cdef object get_dst_info(object tz, dst) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 36ec499c7335c..b200c6302ff38 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -186,16 +186,28 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -cdef int64_t[:] unbox_utcoffsets(object transinfo): +cdef int64_t[:] unbox_utcoffsets(object transinfo, dst): + """ + Unpack the offset information from pytz timezone objects + + Parameters + ---------- + transinfo : list of tuples + Each tuple contains (UTC offset, DST offset, tz abbreviation) + dst : boolean + True returns an array of the DST offsets + False returns an array of UTC offsets + """ cdef: Py_ssize_t i, sz int64_t[:] arr + int key sz = len(transinfo) arr = np.empty(sz, dtype='i8') - + key = int(dst) for i in range(sz): - arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + arr[i] = int(transinfo[i][key].total_seconds()) * 1000000000 return arr @@ -204,9 +216,22 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo): # Daylight Savings -cdef object get_dst_info(object tz): +cdef object get_dst_info(object tz, dst): """ - return a tuple of : + Return DST info from a timezone + + Parameters + ---------- + tz : object + timezone + dst : bool + True returns the DST specific offset + False returns the UTC offset + Specific for pytz timezones only + + Returns + ------- + tuple (UTC times of DST transitions, UTC offsets in microseconds corresponding to DST transitions, string of type of transitions) @@ -230,7 +255,7 @@ cdef object get_dst_info(object tz): trans[0] = NPY_NAT + 1 except Exception: pass - deltas = unbox_utcoffsets(tz._transition_info) + deltas = unbox_utcoffsets(tz._transition_info, dst) typ = 'pytz' elif treat_tz_as_dateutil(tz): @@ -278,6 +303,45 @@ cdef object get_dst_info(object tz): return dst_cache[cache_key] +def _is_dst(int64_t[:] values, object tz): + """ + Return a boolean array where True indicates a value that lies in + daylight savings time and False indicates a value that does not lie in + daylight savings time + + Parameters + ---------- + values : ndarray + i8 representation of the datetimes + tz : object + timezone + + Returns + ------- + ndarray + Booleans + """ + cdef: + Py_ssize_t n = len(values) + # Cython boolean memoryviews are not supported yet + # https://github.com/cython/cython/issues/2204 + # bint[:] result + object typ + + result = np.zeros(n, dtype=bool) + if tz is None: + return result + transitions, offsets, typ = get_dst_info(tz, True) + offsets = np.array(offsets) + # Fixed timezone offsets do not have DST transitions + if typ not in {'pytz', 'dateutil'}: + return result + positions = transitions.searchsorted(values, side='right') + # DST has 0 offset + result = offsets[positions] == 0 + return result + + def infer_tzinfo(start, end): if start is not None and end is not None: tz = start.tzinfo diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2358b1f2fb33d..e392d6222f975 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -471,9 +471,7 @@ def is_dst(self): >>> dti.is_dst() Index([True, True, False, False], dtype='object') """ - if self.tz is None: - return Index([False] * len(self)) - return Index([bool(ts.dst()) for ts in self]) + return Index(timezones._is_dst(self.asi8, self.tz)) @property def size(self): From 641e29546b9d2f87a9bd433315dc59ae499021d1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 18:52:29 -0700 Subject: [PATCH 07/11] Add cython version of is_dst pt2 --- pandas/_libs/tslibs/timezones.pyx | 9 ++++----- pandas/tests/indexes/datetimes/test_timezones.py | 7 ++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index b200c6302ff38..f2f11898d042e 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -305,9 +305,8 @@ cdef object get_dst_info(object tz, dst): def _is_dst(int64_t[:] values, object tz): """ - Return a boolean array where True indicates a value that lies in - daylight savings time and False indicates a value that does not lie in - daylight savings time + Return a boolean array indicating whether each epoch timestamp is in + daylight savings time with respect with the passed timezone. Parameters ---------- @@ -318,8 +317,8 @@ def _is_dst(int64_t[:] values, object tz): Returns ------- - ndarray - Booleans + ndarray of booleans + True indicates daylight savings time """ cdef: Py_ssize_t n = len(values) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 6af7a2362093c..179628f94fd12 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1013,7 +1013,12 @@ def test_iteration_preserves_nanoseconds(self, tz): assert ts == index[i] def test_is_dst(self): - dti = pd.date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific') + dti = DatetimeIndex([]) + result = dti.is_dst() + expected = Index([]) + tm.assert_index_equal(result, expected) + + dti = date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific') result = dti.is_dst() expected = Index([True, True, False, False]) tm.assert_index_equal(result, expected) From 820df35fba0948985aa501e11cbb8b8965ad17e8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 19:01:18 -0700 Subject: [PATCH 08/11] Add fixedoffset test --- pandas/tests/indexes/datetimes/test_timezones.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 179628f94fd12..ee4cfb0f90719 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1028,6 +1028,11 @@ def test_is_dst(self): expected = Index([False] * 4) tm.assert_index_equal(result, expected) + dti_fixed = dti.tz_localize(pytz.FixedOffset(300)) + result = dti_fixed.is_dst() + expected = Index([False] * 4) + tm.assert_index_equal(result, expected) + dti_nat = pd.DatetimeIndex([pd.NaT]) result = dti_nat.is_dst() expected = Index([False]) From 9304b50eff22e9d243baaa36310fc2829a2ab570 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 3 Sep 2018 17:22:58 -0700 Subject: [PATCH 09/11] address review and test failures --- pandas/_libs/tslibs/nattype.pyx | 15 ++++++++ pandas/_libs/tslibs/timezones.pyx | 21 +++++------ pandas/core/indexes/datetimes.py | 2 +- .../tests/indexes/datetimes/test_timezones.py | 36 +++++++------------ 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index c0263b747b081..e05572f66525d 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -260,6 +260,20 @@ class NaTType(_NaT): def is_year_end(self): return False + def is_dst(self): + """ + Returns a boolean indicating if the Timestamp is in daylight savings + time. Naive timestamps are considered not to be in daylight savings + time. + + Returns + ------- + Boolean + True if the Timestamp is in daylight savings time + False if the Timestamp is naive or not in daylight savings time + """ + return False + def __rdiv__(self, other): return _nat_rdivide_op(self, other) @@ -353,6 +367,7 @@ class NaTType(_NaT): strptime = _make_error_func('strptime', datetime) strftime = _make_error_func('strftime', datetime) isocalendar = _make_error_func('isocalendar', datetime) + dst = _make_error_func('dst', datetime) ctime = _make_error_func('ctime', datetime) time = _make_error_func('time', datetime) toordinal = _make_error_func('toordinal', datetime) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index f2f11898d042e..7c1fbf52553dc 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -108,7 +108,8 @@ def _p_tz_cache_key(tz): return tz_cache_key(tz) -# Timezone data caches, key is the pytz string or dateutil file name. +# Timezone data (UTC offset) caches +# key is the pytz string or dateutil file name. dst_cache = {} @@ -225,7 +226,8 @@ cdef object get_dst_info(object tz, dst): tz : object timezone dst : bool - True returns the DST specific offset + True returns the DST specific offset and will NOT store the results in + dst_cache. dst_cache is reserved for caching UTC offsets. False returns the UTC offset Specific for pytz timezones only @@ -246,7 +248,7 @@ cdef object get_dst_info(object tz, dst): np.array([num], dtype=np.int64), None) - if cache_key not in dst_cache: + if cache_key not in dst_cache or dst: if treat_tz_as_pytz(tz): trans = np.array(tz._utc_transition_times, dtype='M8[ns]') trans = trans.view('i8') @@ -298,12 +300,14 @@ cdef object get_dst_info(object tz, dst): deltas = np.array([num], dtype=np.int64) typ = 'static' + if dst: + return trans, deltas, typ dst_cache[cache_key] = (trans, deltas, typ) return dst_cache[cache_key] -def _is_dst(int64_t[:] values, object tz): +def is_dst(int64_t[:] values, object tz): """ Return a boolean array indicating whether each epoch timestamp is in daylight savings time with respect with the passed timezone. @@ -322,9 +326,6 @@ def _is_dst(int64_t[:] values, object tz): """ cdef: Py_ssize_t n = len(values) - # Cython boolean memoryviews are not supported yet - # https://github.com/cython/cython/issues/2204 - # bint[:] result object typ result = np.zeros(n, dtype=bool) @@ -335,9 +336,9 @@ def _is_dst(int64_t[:] values, object tz): # Fixed timezone offsets do not have DST transitions if typ not in {'pytz', 'dateutil'}: return result - positions = transitions.searchsorted(values, side='right') - # DST has 0 offset - result = offsets[positions] == 0 + positions = transitions.searchsorted(values, side='right') - 1 + # DST has nonzero offset + result = offsets[positions] != 0 return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e392d6222f975..50dee697b39fb 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -471,7 +471,7 @@ def is_dst(self): >>> dti.is_dst() Index([True, True, False, False], dtype='object') """ - return Index(timezones._is_dst(self.asi8, self.tz)) + return Index(timezones.is_dst(self.asi8, self.tz)) @property def size(self): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index ee4cfb0f90719..2f5f4650ba948 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1012,30 +1012,20 @@ def test_iteration_preserves_nanoseconds(self, tz): for i, ts in enumerate(index): assert ts == index[i] - def test_is_dst(self): - dti = DatetimeIndex([]) - result = dti.is_dst() - expected = Index([]) - tm.assert_index_equal(result, expected) - - dti = date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific') + @pytest.mark.parametrize('arg, expected_arg', [ + [[], []], + [date_range('2018-11-04', periods=4, freq='H', tz='US/Pacific'), + [True, True, False, False]], + [date_range('2018-11-04', periods=4, freq='H'), + [False] * 4], + [date_range('2018-11-04', periods=4, freq='H', tz=pytz.FixedOffset(3)), + [False] * 4], + [[pd.NaT], [False]] + ]) + def test_is_dst(self, arg, expected_arg): + dti = DatetimeIndex(arg) result = dti.is_dst() - expected = Index([True, True, False, False]) - tm.assert_index_equal(result, expected) - - dti_naive = dti.tz_localize(None) - result = dti_naive.is_dst() - expected = Index([False] * 4) - tm.assert_index_equal(result, expected) - - dti_fixed = dti.tz_localize(pytz.FixedOffset(300)) - result = dti_fixed.is_dst() - expected = Index([False] * 4) - tm.assert_index_equal(result, expected) - - dti_nat = pd.DatetimeIndex([pd.NaT]) - result = dti_nat.is_dst() - expected = Index([False]) + expected = Index(expected_arg) tm.assert_index_equal(result, expected) From 627bb19150fe2bdb0533f236476746c3e4426215 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 4 Sep 2018 19:11:58 -0700 Subject: [PATCH 10/11] type dst and document --- pandas/_libs/tslibs/timezones.pxd | 2 +- pandas/_libs/tslibs/timezones.pyx | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 94eeaccf6284f..638bd0e79c806 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -13,4 +13,4 @@ cpdef object maybe_get_tz(object tz) cdef get_utcoffset(tzinfo, obj) cdef bint is_fixed_offset(object tz) -cdef object get_dst_info(object tz, dst) +cdef object get_dst_info(object tz, bint dst) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 7c1fbf52553dc..e32cab36a4e82 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -187,9 +187,10 @@ cdef object get_utc_trans_times_from_dateutil_tz(object tz): return new_trans -cdef int64_t[:] unbox_utcoffsets(object transinfo, dst): +cdef int64_t[:] unbox_utcoffsets(object transinfo, bint dst): """ - Unpack the offset information from pytz timezone objects + Unpack the offset information from the _transition_info attribute of pytz + timezones Parameters ---------- @@ -206,9 +207,10 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo, dst): sz = len(transinfo) arr = np.empty(sz, dtype='i8') - key = int(dst) for i in range(sz): - arr[i] = int(transinfo[i][key].total_seconds()) * 1000000000 + # If dst == True, extract the DST shift in nanoseconds + # If dst == False, extract the UTC offset in nanoseconds + arr[i] = int(transinfo[i][dst].total_seconds()) * 1000000000 return arr @@ -217,15 +219,15 @@ cdef int64_t[:] unbox_utcoffsets(object transinfo, dst): # Daylight Savings -cdef object get_dst_info(object tz, dst): +cdef object get_dst_info(object tz, bint dst): """ Return DST info from a timezone Parameters ---------- tz : object - timezone - dst : bool + timezone object + dst : bint True returns the DST specific offset and will NOT store the results in dst_cache. dst_cache is reserved for caching UTC offsets. False returns the UTC offset @@ -333,10 +335,12 @@ def is_dst(int64_t[:] values, object tz): return result transitions, offsets, typ = get_dst_info(tz, True) offsets = np.array(offsets) + # Fixed timezone offsets do not have DST transitions if typ not in {'pytz', 'dateutil'}: return result positions = transitions.searchsorted(values, side='right') - 1 + # DST has nonzero offset result = offsets[positions] != 0 return result From c03792e22c10a1affdfde6c119c728b4451f1434 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 4 Sep 2018 19:20:11 -0700 Subject: [PATCH 11/11] Add dateutil test for scalars --- pandas/tests/scalar/timestamp/test_timezones.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 7e7d1ee60856c..54977cc7bfbf8 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -308,13 +308,14 @@ def test_timestamp_timetz_equivalent_with_datetime_tz(self, assert result == expected - def test_timestamp_is_dst(self): + @pytest.mark.parametrize('tz', ['US/Pacific', 'dateutil/US/Pacific']) + def test_timestamp_is_dst(self, tz): ts_naive = Timestamp('2018-11-04') assert ts_naive.is_dst() is False - ts_aware = ts_naive.tz_localize('US/Pacific') + ts_aware = ts_naive.tz_localize(tz) assert ts_aware.is_dst() is True # DST transition at 2am - ts_aware = Timestamp('2018-11-04 04:00').tz_localize('US/Pacific') + ts_aware = Timestamp('2018-11-04 04:00').tz_localize(tz) assert ts_aware.is_dst() is False