From 7d6528af4ac4124fa0bf7848ef8c9474bc77c418 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Aug 2018 22:35:31 -0700 Subject: [PATCH 01/29] Add CalendarOffset --- pandas/tseries/offsets.py | 51 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index c74b7454a67e3..162220dd911f1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -41,7 +41,7 @@ 'LastWeekOfMonth', 'FY5253Quarter', 'FY5253', 'Week', 'WeekOfMonth', 'Easter', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset'] + 'DateOffset', 'CalendarDay'] # convert to/from datetime/timestamp to allow invalid Timestamp ranges to # pass thru @@ -2123,6 +2123,54 @@ def onOffset(self, dt): return False return date(dt.year, dt.month, dt.day) == easter(dt.year) + +class CalendarDay(SingleConstructorOffset): + """ + Calendar day offset. Respects calendar arithmetic as opposed to Day which + respects absolute time. + """ + _adjust_dst = True + _inc = timedelta(days=1) + _prefix = 'CD' + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n, normalize) + + @apply_wraps + def apply(self, other): + """ + Apply the CalendarDay offset to a datetime object. Incoming datetime + objects can be tz-aware or naive. + """ + if type(other) == type(self): + # Add other CalendarDays + return type(self)(self.n + other.n, normalize=self.normalize) + tzinfo = getattr(other, 'tzinfo', None) + if tzinfo is not None: + other = other.replace(tzinfo=None) + + other = other + self.n * self._inc + + if tzinfo is not None: + # This can raise a AmbiguousTimeError or NonExistentTimeError + other = conversion.localize_pydatetime(other, tzinfo) + + try: + return as_timestamp(other) + except TypeError: + raise TypeError("Cannot perform arithmetic between {other} and " + "{offset}".format(other=type(other), offset=self)) + + @apply_index_wraps + def apply_index(self, i): + """ + Apply the CalendarDay offset to a DatetimeIndex. Incoming DatetimeIndex + objects are assumed to be tz_naive + """ + return i + self.n * self._inc + + # --------------------------------------------------------------------- # Ticks @@ -2406,4 +2454,5 @@ def generate_range(start=None, end=None, periods=None, WeekOfMonth, # 'WOM' FY5253, FY5253Quarter, + CalendarDay # 'CD' ]} From 96f734886f2744755a31aa97a6e24d93cc26f39e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Aug 2018 23:19:21 -0700 Subject: [PATCH 02/29] remove calendar day references, add arithmetic tests --- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/tests/tseries/offsets/test_offsets.py | 47 ++++++++++++++++++-- 5 files changed, 48 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..324f98052d2bf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1580,7 +1580,7 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, Right bound for generating dates. periods : integer, optional Number of periods to generate. - freq : str or DateOffset, default 'D' (calendar daily) + freq : str or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H'. See :ref:`here ` for a list of frequency aliases. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 364eea8fb8a3a..4b125580bd7e0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1052,7 +1052,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 - for numeric and 'D' (calendar daily) for datetime-like. + for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 32aa89010b206..3a68c6c26a974 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -840,7 +840,7 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Right bound for generating periods periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency alias name : string, default None Name of the resulting PeriodIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9f14d4cfd5863..063b578e512de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -737,7 +737,7 @@ def timedelta_range(start=None, end=None, periods=None, freq=None, Right bound for generating timedeltas periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H' name : string, default None Name of the resulting TimedeltaIndex diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index e95f1ba11ad5c..af5756b69e696 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -2,6 +2,7 @@ from datetime import date, datetime, timedelta import pytest +import pytz from pandas.compat import range from pandas import compat @@ -28,7 +29,7 @@ YearEnd, Day, QuarterEnd, BusinessMonthEnd, FY5253, Nano, Easter, FY5253Quarter, - LastWeekOfMonth, Tick) + LastWeekOfMonth, Tick, CalendarDay) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle from pandas._libs.tslibs import timezones @@ -192,6 +193,7 @@ class TestCommon(Base): # are applied to 2011/01/01 09:00 (Saturday) # used for .apply and .rollforward expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), + 'CalendarDay': Timestamp('2011-01-02 09:00:00'), 'DateOffset': Timestamp('2011-01-02 09:00:00'), 'BusinessDay': Timestamp('2011-01-03 09:00:00'), 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'), @@ -360,7 +362,7 @@ def test_rollforward(self, offset_types): # result will not be changed if the target is on the offset no_changes = ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', - 'Nano', 'DateOffset'] + 'Nano', 'DateOffset', 'CalendarDay'] for n in no_changes: expecteds[n] = Timestamp('2011/01/01 09:00') @@ -373,6 +375,7 @@ def test_rollforward(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2011-01-02 00:00:00'), + 'CalendarDay': Timestamp('2011-01-02 00:00:00'), 'DateOffset': Timestamp('2011-01-02 00:00:00'), 'MonthBegin': Timestamp('2011-02-01 00:00:00'), 'SemiMonthBegin': Timestamp('2011-01-15 00:00:00'), @@ -425,7 +428,7 @@ def test_rollback(self, offset_types): # result will not be changed if the target is on the offset for n in ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset']: + 'DateOffset', 'CalendarDay']: expecteds[n] = Timestamp('2011/01/01 09:00') # but be changed when normalize=True @@ -434,6 +437,7 @@ def test_rollback(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2010-12-31 00:00:00'), + 'CalendarDay': Timestamp('2010-12-31 00:00:00'), 'DateOffset': Timestamp('2010-12-31 00:00:00'), 'MonthBegin': Timestamp('2010-12-01 00:00:00'), 'SemiMonthBegin': Timestamp('2010-12-15 00:00:00'), @@ -3174,3 +3178,40 @@ def test_last_week_of_month_on_offset(): slow = (ts + offset) - offset == ts fast = offset.onOffset(ts) assert fast == slow + + +def test_CalendarDay_with_timezone(): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + result = ts + CalendarDay(1) + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + assert result == expected + + result = result - CalendarDay(1) + assert result == ts + + # CalendarDay applied to a Timestamp that leads to ambiguous time + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) + + # CalendarDay applied to a Timestamp that leads to non-existent time + with pytest.raises(pytz.NonExistentTimeError): + Timestamp("2019-03-09 02:00:00", tz='US/Pacific') + CalendarDay(1) + +@pytest.mark.parametrize('arg, exp', [ + [1, 2], + [-1, 0], + [-5, -4] +]) +def test_CalendarDay_arithmetic_with_self(arg, exp): + # GH 22274 + result = CalendarDay(1) + CalendarDay(arg) + expected = CalendarDay(exp) + assert result == expected + + +@pytest.mark.parametrize('arg', [timedelta(1), Day(1)]) +def test_CalendarDay_invalid_arithmetic(arg): + # GH 22274 + with pytest.raises(TypeError): + CalendarDay(1) + arg From 7edf6e6898b9881f069fb4a40d7d627c44eb3e06 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Aug 2018 23:21:32 -0700 Subject: [PATCH 03/29] Add another test --- pandas/tests/tseries/offsets/test_offsets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index af5756b69e696..0e96ccc7b21b4 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -34,6 +34,7 @@ from pandas.io.pickle import read_pickle from pandas._libs.tslibs import timezones from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.tslibs.timedeltas import Timedelta import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar @@ -3210,7 +3211,7 @@ def test_CalendarDay_arithmetic_with_self(arg, exp): assert result == expected -@pytest.mark.parametrize('arg', [timedelta(1), Day(1)]) +@pytest.mark.parametrize('arg', [timedelta(1), Day(1), Timedelta(1)]) def test_CalendarDay_invalid_arithmetic(arg): # GH 22274 with pytest.raises(TypeError): From af599dab261d798648eff5e5740203d0c62f0359 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 10 Aug 2018 23:27:59 -0700 Subject: [PATCH 04/29] Add date_range example --- pandas/tests/indexes/datetimes/test_date_range.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 22fb8b2942bea..e2029c12da202 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -578,6 +578,14 @@ def test_mismatching_tz_raises_err(self, start, end): with pytest.raises(TypeError): pd.DatetimeIndex(start, end, freq=BDay()) + def test_CalendarDay_range_with_dst_crossing(self): + # GH 20596 + result = date_range('2018-10-23', '2018-11-06', freq='7CD', + tz='Europe/Paris') + expected = date_range('2018-10-23', '2018-11-06', + freq=pd.DateOffset(days=7), tz='Europe/Paris') + tm.assert_index_equal(result, expected) + class TestBusinessDateRange(object): @@ -772,7 +780,8 @@ def test_cdaterange_weekmask_and_holidays(self): holidays=['2013-05-01']) @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping - if freq.startswith('C')]) + if freq.startswith('C') + and freq != 'CD']) def test_all_custom_freq(self, freq): # should not raise bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', From 59182955e2aecd2bd05b7f267a4338fac751afdf Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 12:11:36 -0700 Subject: [PATCH 05/29] Add more tests --- .../indexes/datetimes/test_date_range.py | 2 +- .../timedeltas/test_timedelta_range.py | 4 +++ pandas/tests/tseries/offsets/test_offsets.py | 33 +++++++++++++++---- pandas/tseries/offsets.py | 4 +-- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e2029c12da202..025298e30a9eb 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -781,7 +781,7 @@ def test_cdaterange_weekmask_and_holidays(self): @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping if freq.startswith('C') - and freq != 'CD']) + and freq != 'CD']) # CalendarDay def test_all_custom_freq(self, freq): # should not raise bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 87dff74cd04d7..1d10e63363cc8 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -48,6 +48,10 @@ def test_timedelta_range(self): result = df.loc['0s':, :] tm.assert_frame_equal(expected, result) + with pytest.raises(ValueError): + # GH 22274: CalendarDay is a relative time measurement + timedelta_range('1day', freq='CD', periods=2) + @pytest.mark.parametrize('periods, freq', [ (3, '2D'), (5, 'D'), (6, '19H12T'), (7, '16H'), (9, '12H')]) def test_linspace_behavior(self, periods, freq): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 0e96ccc7b21b4..cbbcb82c98462 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -11,12 +11,14 @@ from pandas.compat.numpy import np_datetime64_compat from pandas.core.series import Series +from pandas.core.frame import DataFrame from pandas._libs.tslibs import conversion from pandas._libs.tslibs.frequencies import (get_freq_code, get_freq_str, INVALID_FREQ_ERR_MSG) from pandas.tseries.frequencies import _offset_map, get_offset from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) +from pandas.core.indexes.timedeltas import TimedeltaIndex import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, @@ -3180,16 +3182,28 @@ def test_last_week_of_month_on_offset(): fast = offset.onOffset(ts) assert fast == slow - -def test_CalendarDay_with_timezone(): +@pytest.mark.parametrize('box, assert_func', [ + [None, None], + [DatetimeIndex, 'assert_index_equal'], + [Series, 'assert_series_equal']]) +def test_CalendarDay_with_timezone(box, assert_func): # GH 22274 ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') - result = ts + CalendarDay(1) expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') - assert result == expected + if box is not None: + ts = box(([ts])) + expected = box(([expected])) + result = ts + CalendarDay(1) + if assert_func: + getattr(tm, assert_func)(result, expected) + else: + assert result == expected result = result - CalendarDay(1) - assert result == ts + if assert_func: + getattr(tm, assert_func)(result, ts) + else: + assert result == ts # CalendarDay applied to a Timestamp that leads to ambiguous time with pytest.raises(pytz.AmbiguousTimeError): @@ -3199,6 +3213,7 @@ def test_CalendarDay_with_timezone(): with pytest.raises(pytz.NonExistentTimeError): Timestamp("2019-03-09 02:00:00", tz='US/Pacific') + CalendarDay(1) + @pytest.mark.parametrize('arg, exp', [ [1, 2], [-1, 0], @@ -3211,8 +3226,14 @@ def test_CalendarDay_arithmetic_with_self(arg, exp): assert result == expected -@pytest.mark.parametrize('arg', [timedelta(1), Day(1), Timedelta(1)]) +@pytest.mark.parametrize('arg', [ + timedelta(1), + Day(1), + Timedelta(1), + TimedeltaIndex([timedelta(1)])]) def test_CalendarDay_invalid_arithmetic(arg): # GH 22274 + # CalendarDay (relative time) cannot be added to Timedelta-like objects + # (absolute time) with pytest.raises(TypeError): CalendarDay(1) + arg diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 162220dd911f1..d5c32cdd16706 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2140,7 +2140,7 @@ def __init__(self, n=1, normalize=False): @apply_wraps def apply(self, other): """ - Apply the CalendarDay offset to a datetime object. Incoming datetime + Apply scalar arithmetic with CalendarDay offset. Incoming datetime objects can be tz-aware or naive. """ if type(other) == type(self): @@ -2160,7 +2160,7 @@ def apply(self, other): return as_timestamp(other) except TypeError: raise TypeError("Cannot perform arithmetic between {other} and " - "{offset}".format(other=type(other), offset=self)) + "CalendarDay".format(other=type(other))) @apply_index_wraps def apply_index(self, i): From a0609081b26aec8a1d5648c2b79a918c68353220 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 13:00:18 -0700 Subject: [PATCH 06/29] Add whatsnew --- doc/source/timeseries.rst | 5 +- doc/source/whatsnew/v0.24.0.txt | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index b7771436f8e55..a7c84f87f680d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -369,7 +369,7 @@ In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the :func:`date_range` and :func:`bdate_range` functions to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a -**calendar day** while the default for ``bdate_range`` is a **business day**: +**day** while the default for ``bdate_range`` is a **business day**: .. ipython:: python @@ -1158,7 +1158,8 @@ frequencies. We will refer to these aliases as *offset aliases*. "B", "business day frequency" "C", "custom business day frequency" - "D", "calendar day frequency" + "D", "day frequency" + "CD", "calendar day frequency" "W", "weekly frequency" "M", "month end frequency" "SM", "semi-month end frequency (15th and end of month)" diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f26d3d76592d0..bc711bd6a6a11 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -158,6 +158,38 @@ array, but rather an ``ExtensionArray``: This is the same behavior as ``Series.values`` for categorical data. See :ref:`whatsnew_0240.api_breaking.interval_values` for more. +.. _whatsnew_0240.enhancements.calendarday: + +:class:`Day` and associated frequency alias ``'D'`` were documented to represent +a calendar day; however, arithmetic and operations with :class:`Day` consistently +respected absolute time (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). + +:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available +and respect calendar day arithmetic. (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) + +Addition with :class:`CalendarDay` across a daylight savings time transition: + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + +However, if the resulting arithmetic results in a non-existent or ambiguous +time, and error will raise + +.. ipython:: python + + Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) + +The ``'CD'` frequency alias can be used with :func:`date_range` to create +a sequence of dates that are separate by a calendar day. + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + pd.date_range(start=ts, freq='CD', periods=3) + pd.date_range(start=ts, freq='D', periods=3) .. _whatsnew_0240.enhancements.other: @@ -283,6 +315,57 @@ that the dates have been converted to UTC .. ipython:: python pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) +.. _whatsnew_0240.api_breaking.calendarday: + +:class:`Day` and associated frequency alias ``'D'`` were documented to represent +a calendar day; however, arithmetic and operations with :class:`Day` sometimes +respected absolute time (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + + # Respects calendar arithmetic + In [3]: pd.date_range(start=ts, freq='D', periods=3) + Out[3]: + DatetimeIndex(['2016-10-30 00:00:00+03:00', '2016-10-31 00:00:00+02:00', + '2016-11-01 00:00:00+02:00'], + dtype='datetime64[ns, Europe/Helsinki]', freq='D') + + # Respects absolute arithmetic + In [4]: ts + pd.tseries.frequencies.to_offset('D') + Out[4]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki') + +:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available +and respect calendar day arithmetic. (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) + +Addition with :class:`CalendarDay` across a daylight savings time transition: + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + +However, if the resulting arithmetic results in a non-existent or ambiguous +time, and error will raise + +.. ipython:: python + + Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) + +The ``'CD'` frequency alias can be used with :func:`date_range` to create +a sequence of dates that are separate by a calendar day. + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + pd.date_range(start=ts, freq='CD', periods=3) + pd.date_range(start=ts, freq='D', periods=3) + .. _whatsnew_0240.api_breaking.period_end_time: Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` From 3232c26856c61747a6c77a77d9e055b05d9b7d15 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 13:03:35 -0700 Subject: [PATCH 07/29] more doc fixups --- doc/source/whatsnew/v0.24.0.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index bc711bd6a6a11..3a050891ea9a5 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -340,7 +340,8 @@ respected absolute time (i.e. ```Day(n)`` and acted identically to ```Timedelta( Out[4]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki') :class:`CalendarDay` and associated frequency alias ``'CD'`` are now available -and respect calendar day arithmetic. (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) +and respect calendar day arithmetic while :class:`Day` and frequency alias ``'D'`` +will now respect absolute time (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) Addition with :class:`CalendarDay` across a daylight savings time transition: @@ -351,7 +352,7 @@ Addition with :class:`CalendarDay` across a daylight savings time transition: ts + pd.offsets.CalendarDay(1) However, if the resulting arithmetic results in a non-existent or ambiguous -time, and error will raise +time, an error will raise .. ipython:: python From 9605aa16433b9a0dbeb1f17d940244551b35d947 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 14:07:51 -0700 Subject: [PATCH 08/29] cleanup --- pandas/core/indexes/datetimes.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 324f98052d2bf..629660c899a3f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -385,7 +385,10 @@ def _generate_range(cls, start, end, periods, name=None, freq=None, @classmethod def _use_cached_range(cls, freq, _normalized, start, end): - return _use_cached_range(freq, _normalized, start, end) + # Note: This always returns False + return (freq._should_cache() and + not (freq._normalize_cache and not _normalized) and + _naive_in_cache_range(start, end)) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1861,17 +1864,7 @@ def _naive_in_cache_range(start, end): else: if start.tzinfo is not None or end.tzinfo is not None: return False - return _in_range(start, end, _CACHE_START, _CACHE_END) - - -def _in_range(start, end, rng_start, rng_end): - return start > rng_start and end < rng_end - - -def _use_cached_range(freq, _normalized, start, end): - return (freq._should_cache() and - not (freq._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)) + return start > _CACHE_START and end < _CACHE_END def _time_to_micros(time): From 406259756cc00516daa228c5b681560c95d785f3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 14:41:31 -0700 Subject: [PATCH 09/29] Modify and simplify logic of date_range --- pandas/core/arrays/datetimes.py | 103 +++++++++++++------------------- pandas/tseries/offsets.py | 13 +--- 2 files changed, 45 insertions(+), 71 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1dd34cdf73ab5..d188e00212c24 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -32,7 +32,7 @@ from pandas.core import ops from pandas.tseries.frequencies import to_offset -from pandas.tseries.offsets import Tick, Day, generate_range +from pandas.tseries.offsets import Tick, generate_range from pandas.core.arrays import datetimelike as dtl @@ -239,75 +239,58 @@ def _generate_range(cls, start, end, periods, freq, tz=None, start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) - tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) + tz, _ = _infer_tz_from_endpoints(start, end, tz) - if hasattr(freq, 'delta') and freq != Day(): - # sub-Day Tick - if inferred_tz is None and tz is not None: - # naive dates - if start is not None and start.tz is None: - start = start.tz_localize(tz, ambiguous=False) + # If we have a Timedelta-like frequency (Tick) make sure tz + # is set before generating the range. For relative frequencies, + # generate the range with naive dates. + localize_args = {'tz': None} + if isinstance(freq, Tick): + localize_args = {'tz': tz, 'ambiguous': False} + if tz is not None: + # Localize the start and end arguments + if start is not None and start.tz is None: + start = start.tz_localize(**localize_args) - if end is not None and end.tz is None: - end = end.tz_localize(tz, ambiguous=False) + if end is not None and end.tz is None: + end = end.tz_localize(**localize_args) - if start and end: - if start.tz is None and end.tz is not None: - start = start.tz_localize(end.tz, ambiguous=False) + if start and end: + # Make sure start and end have the same tz + if start.tz is None and end.tz is not None: + start = start.tz_localize(**localize_args) - if end.tz is None and start.tz is not None: - end = end.tz_localize(start.tz, ambiguous=False) + if end.tz is None and start.tz is not None: + end = end.tz_localize(**localize_args) + if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): + # Currently always False; never hit index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) - else: + # TODO: Is this ever hit? + if tz is not None and getattr(index, 'tz', None) is None: + arr = conversion.tz_localize_to_utc( + ensure_int64(index.values), + tz, ambiguous=ambiguous) - if tz is not None: - # naive dates - if start is not None and start.tz is not None: - start = start.replace(tzinfo=None) - - if end is not None and end.tz is not None: - end = end.replace(tzinfo=None) - - if start and end: - if start.tz is None and end.tz is not None: - end = end.replace(tzinfo=None) - - if end.tz is None and start.tz is not None: - start = start.replace(tzinfo=None) - - if freq is not None: - if cls._use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq) - else: - index = _generate_regular_range(cls, start, end, - periods, freq) - - if tz is not None and getattr(index, 'tz', None) is None: - arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), - tz, ambiguous=ambiguous) - - index = cls(arr) - - # index is localized datetime64 array -> have to convert - # start/end as well to compare - if start is not None: - start = start.tz_localize(tz).asm8 - if end is not None: - end = end.tz_localize(tz).asm8 - else: - # Create a linearly spaced date_range in local time - start = start.tz_localize(tz) - end = end.tz_localize(tz) - arr = np.linspace(start.value, end.value, periods) - index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 + else: + # Create a linearly spaced date_range in local time + start = start.tz_localize(tz) + end = end.tz_localize(tz) + arr = np.linspace(start.value, end.value, periods) + index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] @@ -1255,12 +1238,10 @@ def _generate_regular_range(cls, start, end, periods, freq): data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: tz = None + # Start and end should have the same timestamp by this point if isinstance(start, Timestamp): tz = start.tz - if isinstance(end, Timestamp): - tz = end.tz - xdr = generate_range(start=start, end=end, periods=periods, offset=freq) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d5c32cdd16706..f4be0e8451ae7 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2348,7 +2348,7 @@ class Nano(Tick): def generate_range(start=None, end=None, periods=None, - offset=BDay(), time_rule=None): + offset=BDay()): """ Generates a sequence of dates corresponding to the specified time offset. Similar to dateutil.rrule except uses pandas DateOffset @@ -2358,9 +2358,8 @@ def generate_range(start=None, end=None, periods=None, ---------- start : datetime (default None) end : datetime (default None) - periods : int, optional - time_rule : (legacy) name of DateOffset object to be used, optional - Corresponds with names expected by tseries.frequencies.get_offset + periods : int, (default None) + offset : DateOffset, (default BDay()) Notes ----- @@ -2368,17 +2367,11 @@ def generate_range(start=None, end=None, periods=None, * At least two of (start, end, periods) must be specified. * If both start and end are specified, the returned dates will satisfy start <= date <= end. - * If both time_rule and offset are specified, time_rule supersedes offset. Returns ------- dates : generator object - """ - if time_rule is not None: - from pandas.tseries.frequencies import get_offset - - offset = get_offset(time_rule) start = to_datetime(start) end = to_datetime(end) From fea7a2b8d13dd21ded3d2d53dbc9c2bd5f3679ac Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 16:45:00 -0700 Subject: [PATCH 10/29] adjust date_range tests for D --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/datetimes.py | 10 ++--- .../indexes/datetimes/test_date_range.py | 44 +++++++++---------- .../tests/indexes/datetimes/test_timezones.py | 36 ++++++++------- pandas/tseries/offsets.py | 8 +++- 5 files changed, 53 insertions(+), 47 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3a050891ea9a5..85910f5ddc763 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -319,7 +319,7 @@ that the dates have been converted to UTC :class:`Day` and associated frequency alias ``'D'`` were documented to represent a calendar day; however, arithmetic and operations with :class:`Day` sometimes -respected absolute time (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). +respected absolute time instead (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). *Previous Behavior*: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d188e00212c24..eaf00c0936526 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -241,11 +241,11 @@ def _generate_range(cls, start, end, periods, freq, tz=None, tz, _ = _infer_tz_from_endpoints(start, end, tz) - # If we have a Timedelta-like frequency (Tick) make sure tz - # is set before generating the range. For relative frequencies, - # generate the range with naive dates. + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range localize_args = {'tz': None} - if isinstance(freq, Tick): + if isinstance(freq, Tick) or freq is None: localize_args = {'tz': tz, 'ambiguous': False} if tz is not None: # Localize the start and end arguments @@ -287,8 +287,6 @@ def _generate_range(cls, start, end, periods, freq, tz=None, end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time - start = start.tz_localize(tz) - end = end.tz_localize(tz) arr = np.linspace(start.value, end.value, periods) index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 025298e30a9eb..e0caf671fc390 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -336,28 +336,28 @@ def test_range_tz_pytz(self): assert dr[0] == start assert dr[2] == end - def test_range_tz_dst_straddle_pytz(self): - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) + @pytest.mark.parametrize('start, end', [ + [Timestamp(datetime(2014, 3, 6), tz='US/Eastern'), + Timestamp(datetime(2014, 3, 12), tz='US/Eastern')], + [Timestamp(datetime(2013, 11, 1), tz='US/Eastern'), + Timestamp(datetime(2013, 11, 6), tz='US/Eastern')] + ]) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq='CD') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): # see gh-2906 diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 95531b2d7a7ae..dc01f7ccbd496 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -429,24 +429,24 @@ def test_dti_tz_localize_utc_conversion(self, tz): with pytest.raises(pytz.NonExistentTimeError): rng.tz_localize(tz) - def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + @pytest.mark.parametrize('idx', [ + date_range(start='2014-01-01', end='2014-12-31', freq='M'), + date_range(start='2014-01-01', end='2014-12-31', freq='CD'), + date_range(start='2014-01-01', end='2014-03-01', freq='H'), + date_range(start='2014-08-01', end='2014-10-31', freq='T') + ]) + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture, idx): tz = tz_aware_fixture + localized = idx.tz_localize(tz) + expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, + tz=tz) + tm.assert_index_equal(localized, expected) + with pytest.raises(TypeError): + localized.tz_localize(tz) - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - for idx in [idx1, idx2, idx3, idx4]: - localized = idx.tz_localize(tz) - expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, - tz=tz) - tm.assert_index_equal(localized, expected) - with pytest.raises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - tm.assert_index_equal(reset, idx) - assert reset.tzinfo is None + reset = localized.tz_localize(None) + tm.assert_index_equal(reset, idx) + assert reset.tzinfo is None def test_dti_tz_localize_naive(self): rng = date_range('1/1/2011', periods=100, freq='H') @@ -1033,7 +1033,9 @@ def test_date_range_span_dst_transition(self, tzstr): assert (dr.hour == 0).all() dr = date_range('2012-11-02', periods=10, tz=tzstr) - assert (dr.hour == 0).all() + result = dr.hour + expected = Index([0, 0, 0, 23, 23, 23, 23, 23, 23, 23]) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) def test_date_range_timezone_str_argument(self, tzstr): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index f4be0e8451ae7..6dee43d48e6bf 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2348,7 +2348,7 @@ class Nano(Tick): def generate_range(start=None, end=None, periods=None, - offset=BDay()): + offset=BDay(), time_rule=None): """ Generates a sequence of dates corresponding to the specified time offset. Similar to dateutil.rrule except uses pandas DateOffset @@ -2360,6 +2360,8 @@ def generate_range(start=None, end=None, periods=None, end : datetime (default None) periods : int, (default None) offset : DateOffset, (default BDay()) + time_rule : (legacy) name of DateOffset object to be used, optional + Corresponds with names expected by tseries.frequencies.get_offset Notes ----- @@ -2372,6 +2374,10 @@ def generate_range(start=None, end=None, periods=None, ------- dates : generator object """ + if time_rule is not None: + from pandas.tseries.frequencies import get_offset + + offset = get_offset(time_rule) start = to_datetime(start) end = to_datetime(end) From 3330c9b7c5b4b5cf3f98212114fd82652c01cf59 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 16:53:13 -0700 Subject: [PATCH 11/29] Flake8 --- pandas/tests/tseries/offsets/test_offsets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index cbbcb82c98462..43c8c75e5f032 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -11,7 +11,6 @@ from pandas.compat.numpy import np_datetime64_compat from pandas.core.series import Series -from pandas.core.frame import DataFrame from pandas._libs.tslibs import conversion from pandas._libs.tslibs.frequencies import (get_freq_code, get_freq_str, INVALID_FREQ_ERR_MSG) @@ -3182,6 +3181,7 @@ def test_last_week_of_month_on_offset(): fast = offset.onOffset(ts) assert fast == slow + @pytest.mark.parametrize('box, assert_func', [ [None, None], [DatetimeIndex, 'assert_index_equal'], From c664b28791bfc36b47b4c98418d86d09a696537b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 17:09:34 -0700 Subject: [PATCH 12/29] Adjust resample tests --- pandas/tests/test_resample.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index d11077668359b..b7b296a1fc882 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2031,7 +2031,7 @@ def test_resample_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') df = DataFrame([5], index=dti) - assert_frame_equal(df.resample(rule='D').sum(), + assert_frame_equal(df.resample(rule='CD').sum(), DataFrame([5], index=df.index.normalize())) df.resample(rule='MS').sum() assert_frame_equal( @@ -2085,14 +2085,14 @@ def test_resample_dst_anchor(self): df_daily = df['10/26/2013':'10/29/2013'] assert_frame_equal( - df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"}) + df_daily.resample("CD").agg({"a": "min", "b": "max", "c": "count"}) [["a", "b", "c"]], DataFrame({"a": [1248, 1296, 1346, 1394], "b": [1295, 1345, 1393, 1441], "c": [48, 50, 48, 48]}, index=date_range('10/26/2013', '10/29/2013', - freq='D', tz='Europe/Paris')), - 'D Frequency') + freq='CD', tz='Europe/Paris')), + 'CD Frequency') def test_downsample_across_dst(self): # GH 8531 From e2e059ace657cf96356236c06748e57b4d42e66d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 17:12:03 -0700 Subject: [PATCH 13/29] remove redundant whatsnew section --- doc/source/whatsnew/v0.24.0.txt | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 85910f5ddc763..59a8aa4660ed1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -158,38 +158,6 @@ array, but rather an ``ExtensionArray``: This is the same behavior as ``Series.values`` for categorical data. See :ref:`whatsnew_0240.api_breaking.interval_values` for more. -.. _whatsnew_0240.enhancements.calendarday: - -:class:`Day` and associated frequency alias ``'D'`` were documented to represent -a calendar day; however, arithmetic and operations with :class:`Day` consistently -respected absolute time (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). - -:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available -and respect calendar day arithmetic. (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) - -Addition with :class:`CalendarDay` across a daylight savings time transition: - -.. ipython:: python - - ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') - ts + pd.offsets.Day(1) - ts + pd.offsets.CalendarDay(1) - -However, if the resulting arithmetic results in a non-existent or ambiguous -time, and error will raise - -.. ipython:: python - - Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) - -The ``'CD'` frequency alias can be used with :func:`date_range` to create -a sequence of dates that are separate by a calendar day. - -.. ipython:: python - - ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') - pd.date_range(start=ts, freq='CD', periods=3) - pd.date_range(start=ts, freq='D', periods=3) .. _whatsnew_0240.enhancements.other: From 04b35af74ac87855bffe17b4f4ef8ac3edc44530 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 11 Aug 2018 17:16:11 -0700 Subject: [PATCH 14/29] some edits --- doc/source/whatsnew/v0.24.0.txt | 4 ++-- pandas/tseries/offsets.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 59a8aa4660ed1..6ca2231157163 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -287,7 +287,7 @@ that the dates have been converted to UTC :class:`Day` and associated frequency alias ``'D'`` were documented to represent a calendar day; however, arithmetic and operations with :class:`Day` sometimes -respected absolute time instead (i.e. ```Day(n)`` and acted identically to ```Timedelta(days=n)``). +respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Timedelta(days=n)``). *Previous Behavior*: @@ -320,7 +320,7 @@ Addition with :class:`CalendarDay` across a daylight savings time transition: ts + pd.offsets.CalendarDay(1) However, if the resulting arithmetic results in a non-existent or ambiguous -time, an error will raise +time, an error will raise. .. ipython:: python diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6dee43d48e6bf..192b42456a65f 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2369,10 +2369,12 @@ def generate_range(start=None, end=None, periods=None, * At least two of (start, end, periods) must be specified. * If both start and end are specified, the returned dates will satisfy start <= date <= end. + * If both time_rule and offset are specified, time_rule supersedes offset. Returns ------- dates : generator object + """ if time_rule is not None: from pandas.tseries.frequencies import get_offset From af9117e0afa0241dee2fdc53a8524b076ca3e324 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 13 Aug 2018 15:37:55 -0700 Subject: [PATCH 15/29] Fix some bugs --- pandas/core/arrays/datetimes.py | 4 +++- pandas/tests/series/test_timezones.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index eaf00c0936526..cbfc305553158 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1236,9 +1236,11 @@ def _generate_regular_range(cls, start, end, periods, freq): data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: tz = None - # Start and end should have the same timestamp by this point + # start and end should have the same timezone by this point if isinstance(start, Timestamp): tz = start.tz + elif isinstance(end, Timestamp): + tz = end.tz xdr = generate_range(start=start, end=end, periods=periods, offset=freq) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index d59e7fd445f17..472b2c5644fa5 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -302,7 +302,7 @@ def test_getitem_pydatetime_tz(self, tzstr): def test_series_truncate_datetimeindex_tz(self): # GH 9243 - idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific') + idx = date_range('4/1/2005', '4/30/2005', freq='CD', tz='US/Pacific') s = Series(range(len(idx)), index=idx) result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) expected = Series([1, 2, 3], index=idx[1:4]) From 307073cafbb4ee99477cc77a0bfa9ce42f1df322 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 14 Aug 2018 21:13:26 -0700 Subject: [PATCH 16/29] add sections in timeseries.rst --- doc/source/timeseries.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index a7c84f87f680d..985ae5f8ee1ff 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -868,6 +868,24 @@ normalized after the function is applied. hour.apply(pd.Timestamp('2014-01-01 23:00')) +Day vs. CalendarDay +~~~~~~~~~~~~~~~~~~~ + +:class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time +arithmetic and essentially is an alias for 24 :class:`Hour`. This offset is the default +argument to many Pandas time related function like :func:`date_range` and :func:`timedelta_range`. + +:class:`CalendarDay` (``'CD'``) is a relativetimedelta-like offset that respects +calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day +semantics with date times with have day light savings transitions. + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + + Parametric Offsets ~~~~~~~~~~~~~~~~~~ From 4698d052bf71190f227ba5dccea881d6e5c9cdac Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 15 Aug 2018 14:51:34 -0700 Subject: [PATCH 17/29] Deduplicate tests --- pandas/tests/indexes/datetimes/test_astype.py | 86 ++----------------- 1 file changed, 8 insertions(+), 78 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 64b8f48f6a4e1..d93803ad824a1 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -259,88 +259,18 @@ def test_to_period_microsecond(self): assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') - def test_to_period_tz_pytz(self): - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - + @pytest.mark.parametrize('tz', [ + 'US/Eastern', pytz.utc, tzlocal(), 'dateutil/US/Eastern', + dateutil.tz.tzutc()]) + def test_to_period_tz(self, tz): + ts = date_range('1/1/2000', '4/1/2000', tz=tz) result = ts.to_period()[0] expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + expected = date_range('1/1/2000', '4/1/2000').to_period() + result = ts.to_period() + tm.assert_index_equal(result, expected) def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) From dc15dd355445b4032771d53176d7ff5f779c3cdb Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 15 Aug 2018 17:11:58 -0700 Subject: [PATCH 18/29] Check what appveyor returns --- pandas/tests/indexes/datetimes/test_astype.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index d93803ad824a1..b6b39f1a15695 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -264,6 +264,8 @@ def test_to_period_microsecond(self): dateutil.tz.tzutc()]) def test_to_period_tz(self, tz): ts = date_range('1/1/2000', '4/1/2000', tz=tz) + if tz == tzlocal(): + raise ValueError(ts) result = ts.to_period()[0] expected = ts[0].to_period() assert result == expected From 846faa22616a42557f77ffbb300d70a2981d4186 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Thu, 16 Aug 2018 15:06:23 -0700 Subject: [PATCH 19/29] Adjust range since tzlocal has a different expected value due to dst --- pandas/tests/indexes/datetimes/test_astype.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index b6b39f1a15695..78b669de95598 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -263,14 +263,12 @@ def test_to_period_microsecond(self): 'US/Eastern', pytz.utc, tzlocal(), 'dateutil/US/Eastern', dateutil.tz.tzutc()]) def test_to_period_tz(self, tz): - ts = date_range('1/1/2000', '4/1/2000', tz=tz) - if tz == tzlocal(): - raise ValueError(ts) + ts = date_range('1/1/2000', '2/1/2000', tz=tz) result = ts.to_period()[0] expected = ts[0].to_period() assert result == expected - expected = date_range('1/1/2000', '4/1/2000').to_period() + expected = date_range('1/1/2000', '2/1/2000').to_period() result = ts.to_period() tm.assert_index_equal(result, expected) From cbfecc5a00b84b88d4b92c0663ed29736fb1a124 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 20 Aug 2018 15:15:44 -0700 Subject: [PATCH 20/29] Address review of tests and docs --- doc/source/timeseries.rst | 6 +- doc/source/whatsnew/v0.24.0.txt | 19 +--- pandas/core/arrays/datetimes.py | 3 +- pandas/tests/tseries/offsets/test_offsets.py | 113 ++++++++++--------- 4 files changed, 71 insertions(+), 70 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 985ae5f8ee1ff..8c2a32d27953c 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -868,14 +868,16 @@ normalized after the function is applied. hour.apply(pd.Timestamp('2014-01-01 23:00')) +.. _timeseries.dayvscalendarday: + Day vs. CalendarDay ~~~~~~~~~~~~~~~~~~~ :class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time arithmetic and essentially is an alias for 24 :class:`Hour`. This offset is the default -argument to many Pandas time related function like :func:`date_range` and :func:`timedelta_range`. +argument to many pandas time related function like :func:`date_range` and :func:`timedelta_range`. -:class:`CalendarDay` (``'CD'``) is a relativetimedelta-like offset that respects +:class:`CalendarDay` (``'CD'``) is a relativedelta-like offset that respects calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day semantics with date times with have day light savings transitions. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5493296f5b9a2..c80e13ad8d1cb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -285,6 +285,9 @@ that the dates have been converted to UTC .. _whatsnew_0240.api_breaking.calendarday: +CalendarDay Offset +^^^^^^^^^^^^^^^^^^ + :class:`Day` and associated frequency alias ``'D'`` were documented to represent a calendar day; however, arithmetic and operations with :class:`Day` sometimes respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Timedelta(days=n)``). @@ -319,22 +322,6 @@ Addition with :class:`CalendarDay` across a daylight savings time transition: ts + pd.offsets.Day(1) ts + pd.offsets.CalendarDay(1) -However, if the resulting arithmetic results in a non-existent or ambiguous -time, an error will raise. - -.. ipython:: python - - Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) - -The ``'CD'` frequency alias can be used with :func:`date_range` to create -a sequence of dates that are separate by a calendar day. - -.. ipython:: python - - ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') - pd.date_range(start=ts, freq='CD', periods=3) - pd.date_range(start=ts, freq='D', periods=3) - .. _whatsnew_0240.api_breaking.period_end_time: Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cbfc305553158..b3dee40a72a5b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -244,9 +244,10 @@ def _generate_range(cls, start, end, periods, freq, tz=None, # Make sure start and end are timezone localized if: # 1) freq = a Timedelta-like frequency (Tick) # 2) freq = None i.e. generating a linspaced range - localize_args = {'tz': None} if isinstance(freq, Tick) or freq is None: localize_args = {'tz': tz, 'ambiguous': False} + else: + localize_args = {'tz': None} if tz is not None: # Localize the start and end arguments if start is not None and start.tz is None: diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 43c8c75e5f032..f9f5fc2484bda 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -3182,58 +3182,69 @@ def test_last_week_of_month_on_offset(): assert fast == slow -@pytest.mark.parametrize('box, assert_func', [ - [None, None], - [DatetimeIndex, 'assert_index_equal'], - [Series, 'assert_series_equal']]) -def test_CalendarDay_with_timezone(box, assert_func): - # GH 22274 - ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') - expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') - if box is not None: - ts = box(([ts])) - expected = box(([expected])) - result = ts + CalendarDay(1) - if assert_func: - getattr(tm, assert_func)(result, expected) - else: +class TestCalendarDay(object): + + def test_add_across_dst_scalar(self): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + result = ts + CalendarDay(1) assert result == expected - result = result - CalendarDay(1) - if assert_func: - getattr(tm, assert_func)(result, ts) - else: + result = result - CalendarDay(1) assert result == ts - # CalendarDay applied to a Timestamp that leads to ambiguous time - with pytest.raises(pytz.AmbiguousTimeError): - Timestamp("2018-11-03 01:00:00", tz='US/Pacific') + CalendarDay(1) - - # CalendarDay applied to a Timestamp that leads to non-existent time - with pytest.raises(pytz.NonExistentTimeError): - Timestamp("2019-03-09 02:00:00", tz='US/Pacific') + CalendarDay(1) - - -@pytest.mark.parametrize('arg, exp', [ - [1, 2], - [-1, 0], - [-5, -4] -]) -def test_CalendarDay_arithmetic_with_self(arg, exp): - # GH 22274 - result = CalendarDay(1) + CalendarDay(arg) - expected = CalendarDay(exp) - assert result == expected - - -@pytest.mark.parametrize('arg', [ - timedelta(1), - Day(1), - Timedelta(1), - TimedeltaIndex([timedelta(1)])]) -def test_CalendarDay_invalid_arithmetic(arg): - # GH 22274 - # CalendarDay (relative time) cannot be added to Timedelta-like objects - # (absolute time) - with pytest.raises(TypeError): - CalendarDay(1) + arg + @pytest.mark.parametrize('box', [DatetimeIndex, Series]) + def test_add_across_dst_array(self, box): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + arr = box([ts]) + expected = box([expected]) + result = arr + CalendarDay(1) + tm.assert_equal(result, expected) + + result = result - CalendarDay(1) + tm.assert_equal(arr, result) + + @pytest.mark.parametrize('arg', [ + Timestamp("2018-11-03 01:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2018-11-03 01:00:00", tz='US/Pacific')]) + ]) + def test_raises_AmbiguousTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.AmbiguousTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg', [ + Timestamp("2019-03-09 02:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2019-03-09 02:00:00", tz='US/Pacific')]) + ]) + def test_raises_NonExistentTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.NonExistentTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg, exp', [ + [1, 2], + [-1, 0], + [-5, -4] + ]) + def test_arithmetic(self, arg, exp): + # GH 22274 + result = CalendarDay(1) + CalendarDay(arg) + expected = CalendarDay(exp) + assert result == expected + + @pytest.mark.parametrize('arg', [ + timedelta(1), + Day(1), + Timedelta(1), + TimedeltaIndex([timedelta(1)]) + ]) + def test_invalid_arithmetic(self, arg): + # GH 22274 + # CalendarDay (relative time) cannot be added to Timedelta-like objects + # (absolute time) + with pytest.raises(TypeError): + CalendarDay(1) + arg From 585c2b936f30bf1e5f305bb4dc737c9d37651090 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 21 Aug 2018 17:03:23 -0700 Subject: [PATCH 21/29] Remove unused block? --- pandas/core/arrays/datetimes.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b3dee40a72a5b..35804c52725ba 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -271,21 +271,6 @@ def _generate_range(cls, start, end, periods, freq, tz=None, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) - - # TODO: Is this ever hit? - if tz is not None and getattr(index, 'tz', None) is None: - arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), - tz, ambiguous=ambiguous) - - index = cls(arr) - - # index is localized datetime64 array -> have to convert - # start/end as well to compare - if start is not None: - start = start.tz_localize(tz).asm8 - if end is not None: - end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) From 33eea987a7f5c098a54eeba8a8ce651cefedea1b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 21 Aug 2018 23:15:40 -0700 Subject: [PATCH 22/29] Add back block --- pandas/core/arrays/datetimes.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 35804c52725ba..75c3d6c21b099 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -271,6 +271,20 @@ def _generate_range(cls, start, end, periods, freq, tz=None, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) + + if tz is not None and getattr(index, 'tz', None) is None: + arr = conversion.tz_localize_to_utc( + ensure_int64(index.values), + tz, ambiguous=ambiguous) + + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) From 496f2e730ecca09c41ad894c1e3c7260c62cbfad Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 22 Aug 2018 11:04:32 -0700 Subject: [PATCH 23/29] Address review --- doc/source/timeseries.rst | 5 ++-- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/datetimes.py | 30 ++++++++++++++++++---- pandas/tests/tseries/offsets/test_ticks.py | 10 +++++++- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 649e94b7f23c0..372f10396c8fb 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -892,12 +892,13 @@ Day vs. CalendarDay ~~~~~~~~~~~~~~~~~~~ :class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time -arithmetic and essentially is an alias for 24 :class:`Hour`. This offset is the default +arithmetic and is an alias for 24 :class:`Hour`. This offset is the default argument to many pandas time related function like :func:`date_range` and :func:`timedelta_range`. :class:`CalendarDay` (``'CD'``) is a relativedelta-like offset that respects calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day -semantics with date times with have day light savings transitions. +semantics with date times with have day light savings transitions, i.e. :class:`CalendarDay` +will preserve the hour before the day light savings transition. .. ipython:: python diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 13df48ba0445e..50b01cab3cc64 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -315,6 +315,7 @@ respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Time :class:`CalendarDay` and associated frequency alias ``'CD'`` are now available and respect calendar day arithmetic while :class:`Day` and frequency alias ``'D'`` will now respect absolute time (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) +See the :ref:`documentation here ` for more information. Addition with :class:`CalendarDay` across a daylight savings time transition: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 75c3d6c21b099..dbdf46626b857 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -248,6 +248,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, localize_args = {'tz': tz, 'ambiguous': False} else: localize_args = {'tz': None} + if tz is not None: # Localize the start and end arguments if start is not None and start.tz is None: @@ -258,11 +259,9 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if start and end: # Make sure start and end have the same tz - if start.tz is None and end.tz is not None: - start = start.tz_localize(**localize_args) - - if end.tz is None and start.tz is not None: - end = end.tz_localize(**localize_args) + start = _maybe_localize_point(start, start.tz, end.tz, + localize_args) + end = _maybe_localize_point(end, end.tz, start.tz, localize_args) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): @@ -1310,3 +1309,24 @@ def _maybe_normalize_endpoints(start, end, normalize): _normalized = _normalized and end.time() == _midnight return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, localize_args): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : tz argument that should be None + is_not_none : tz argument that should not be None + localize_args : dict to pass to tz_localize + + Returns + ------- + ts : Timestamp + """ + if is_none is None and is_not_none is not None: + ts = ts.tz_localize(**localize_args) + return ts diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 24033d4ff6cbd..a55988001737d 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -9,7 +9,8 @@ from pandas import Timedelta, Timestamp from pandas.tseries import offsets -from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano +from pandas.tseries.offsets import (Day, Hour, Minute, Second, Milli, Micro, + Nano) from .common import assert_offset_equal @@ -171,6 +172,13 @@ def test_Nanosecond(): assert Micro(5) + Nano(1) == Nano(5001) +def test_Day_equals_24_Hours(): + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + result = ts + Day(1) + expected = ts + Hour(24) + assert result == expected + + @pytest.mark.parametrize('kls, expected', [(Hour, Timedelta(hours=5)), (Minute, Timedelta(hours=2, minutes=3)), From 815811eb81372c3a2c78158783db9573e84b85e7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Aug 2018 11:34:09 -0700 Subject: [PATCH 24/29] Simplify linspace call --- pandas/core/arrays/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dbdf46626b857..c04e46feb140b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -286,8 +286,8 @@ def _generate_range(cls, start, end, periods, freq, tz=None, end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time - arr = np.linspace(start.value, end.value, periods) - index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + arr = np.linspace(start.value, end.value, periods, dtype='M8[ns]') + index = cls._simple_new(arr, freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] From 637d92a7bb64b04b66b74945ef8563f2b4b73303 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 26 Aug 2018 18:37:04 -0700 Subject: [PATCH 25/29] address more comments --- pandas/core/arrays/datetimes.py | 1 + pandas/tseries/offsets.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c04e46feb140b..418fbccd21e22 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -266,6 +266,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit + # Should be reimplemented as apart of #17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 192b42456a65f..d4a8211c17b87 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2130,7 +2130,7 @@ class CalendarDay(SingleConstructorOffset): respects absolute time. """ _adjust_dst = True - _inc = timedelta(days=1) + _inc = Timedelta(days=1) _prefix = 'CD' _attributes = frozenset(['n', 'normalize']) From 4cd5c952c990c78f74f9b7b36b93a83a22ca0e7b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 29 Aug 2018 09:38:22 -0700 Subject: [PATCH 26/29] Undo linspace dtype setting for numpy compat --- pandas/core/arrays/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 418fbccd21e22..e8b8e4b0b42e4 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -287,8 +287,8 @@ def _generate_range(cls, start, end, periods, freq, tz=None, end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time - arr = np.linspace(start.value, end.value, periods, dtype='M8[ns]') - index = cls._simple_new(arr, freq=None, tz=tz) + arr = np.linspace(start.value, end.value, periods) + index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] From 0441ff108671cd13ad02baa0b3ae25f2f48d31f4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 10:33:57 -0700 Subject: [PATCH 27/29] Simplify localize --- pandas/core/arrays/datetimes.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e8b8e4b0b42e4..93c3a611222e7 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -251,18 +251,20 @@ def _generate_range(cls, start, end, periods, freq, tz=None, if tz is not None: # Localize the start and end arguments - if start is not None and start.tz is None: - start = start.tz_localize(**localize_args) - - if end is not None and end.tz is None: - end = end.tz_localize(**localize_args) - + start = _maybe_localize_point( + start, getattr(start, 'tz', None), start, localize_args + ) + end = _maybe_localize_point( + end, getattr(end, 'tz', None), end, localize_args + ) if start and end: # Make sure start and end have the same tz - start = _maybe_localize_point(start, start.tz, end.tz, - localize_args) - end = _maybe_localize_point(end, end.tz, start.tz, localize_args) - + start = _maybe_localize_point( + start, start.tz, end.tz, localize_args + ) + end = _maybe_localize_point( + end, end.tz, start.tz, localize_args + ) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit From 85929bfca7d010b4636e5344c01617c7b80ff10b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 31 Aug 2018 18:33:54 -0700 Subject: [PATCH 28/29] Clarify docstring --- pandas/core/arrays/datetimes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 93c3a611222e7..cd4f7399eeb40 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1322,8 +1322,8 @@ def _maybe_localize_point(ts, is_none, is_not_none, localize_args): Parameters ---------- ts : start or end Timestamp to potentially localize - is_none : tz argument that should be None - is_not_none : tz argument that should not be None + is_none : argument that should be None + is_not_none : argument that should not be None localize_args : dict to pass to tz_localize Returns From df076470773360bf825c224407a1aa4fbd52b41c Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 4 Sep 2018 18:43:54 -0700 Subject: [PATCH 29/29] move localize_args logic --- pandas/core/arrays/datetimes.py | 34 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index bc3c56051296d..466cfb296094c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -241,34 +241,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None, tz, _ = _infer_tz_from_endpoints(start, end, tz) - # Make sure start and end are timezone localized if: - # 1) freq = a Timedelta-like frequency (Tick) - # 2) freq = None i.e. generating a linspaced range - if isinstance(freq, Tick) or freq is None: - localize_args = {'tz': tz, 'ambiguous': False} - else: - localize_args = {'tz': None} - if tz is not None: # Localize the start and end arguments start = _maybe_localize_point( - start, getattr(start, 'tz', None), start, localize_args + start, getattr(start, 'tz', None), start, freq, tz ) end = _maybe_localize_point( - end, getattr(end, 'tz', None), end, localize_args + end, getattr(end, 'tz', None), end, freq, tz ) if start and end: # Make sure start and end have the same tz start = _maybe_localize_point( - start, start.tz, end.tz, localize_args + start, start.tz, end.tz, freq, tz ) end = _maybe_localize_point( - end, end.tz, start.tz, localize_args + end, end.tz, start.tz, freq, tz ) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit - # Should be reimplemented as apart of #17914 + # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: @@ -290,7 +282,9 @@ def _generate_range(cls, start, end, periods, freq, tz=None, else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) - index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + index = cls._simple_new( + arr.astype('M8[ns]', copy=False), freq=None, tz=tz + ) if not left_closed and len(index) and index[0] == start: index = index[1:] @@ -1315,7 +1309,7 @@ def _maybe_normalize_endpoints(start, end, normalize): return start, end, _normalized -def _maybe_localize_point(ts, is_none, is_not_none, localize_args): +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz): """ Localize a start or end Timestamp to the timezone of the corresponding start or end Timestamp @@ -1325,12 +1319,20 @@ def _maybe_localize_point(ts, is_none, is_not_none, localize_args): ts : start or end Timestamp to potentially localize is_none : argument that should be None is_not_none : argument that should not be None - localize_args : dict to pass to tz_localize + freq : Tick, DateOffset, or None + tz : str, timezone object or None Returns ------- ts : Timestamp """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if isinstance(freq, Tick) or freq is None: + localize_args = {'tz': tz, 'ambiguous': False} + else: + localize_args = {'tz': None} if is_none is None and is_not_none is not None: ts = ts.tz_localize(**localize_args) return ts