From 967f39efd87500ac6a88b8f2433dda6e3cd77217 Mon Sep 17 00:00:00 2001 From: GFJ138 Date: Sun, 7 Feb 2021 16:14:19 +0100 Subject: [PATCH 1/5] fix #39556: - check that the delta are unique before checking if the are day multiples - add test with freq="H" that raises the bug --- pandas/tests/tseries/frequencies/test_inference.py | 2 +- pandas/tseries/frequencies.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index 95edd038dab9b..892be119093e5 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -267,7 +267,7 @@ def test_infer_freq_tz(tz_naive_fixture, expected, dates): ], ) @pytest.mark.parametrize( - "freq", ["3H", "10T", "3601S", "3600001L", "3600000001U", "3600000000001N"] + "freq", ["H", "3H", "10T", "3601S", "3600001L", "3600000001U", "3600000000001N"] ) def test_infer_freq_tz_transition(tz_naive_fixture, date_pair, freq): # see gh-8772 diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0d5598fcaf890..d07bf6dbbfc2c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -239,17 +239,17 @@ def get_freq(self) -> Optional[str]: if not self.is_monotonic or not self.index._is_unique: return None - delta = self.deltas[0] - if _is_multiple(delta, _ONE_DAY): + if self.is_unique and _is_multiple(self.deltas[0], _ONE_DAY): return self._infer_daily_rule() # Business hourly, maybe. 17: one day / 65: one weekend if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): return "BH" + # Possibly intraday frequency. Here we use the # original .asi8 values as the modified values # will not work around DST transitions. See #8772 - elif not self.is_unique_asi8: + if not self.is_unique_asi8: return None delta = self.deltas_asi8[0] @@ -414,7 +414,7 @@ def _infer_daily_rule(self): def _is_multiple(us, mult: int) -> bool: - return us % mult == 0 + return (us % mult == 0) def _maybe_add_count(base: str, count: float) -> str: From 551f3823d2ce0b4935465bf2a1cec7ad60a1cb51 Mon Sep 17 00:00:00 2001 From: GFJ138 Date: Sun, 7 Feb 2021 17:07:56 +0100 Subject: [PATCH 2/5] fix lint --- pandas/tseries/frequencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d07bf6dbbfc2c..ad3c3b02bd07d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -414,7 +414,7 @@ def _infer_daily_rule(self): def _is_multiple(us, mult: int) -> bool: - return (us % mult == 0) + return us % mult == 0 def _maybe_add_count(base: str, count: float) -> str: From 5a7bdc439dacaf5f7909d997aa2d7e696fe8391f Mon Sep 17 00:00:00 2001 From: GFJ138 Date: Sun, 7 Feb 2021 20:04:06 +0100 Subject: [PATCH 3/5] when freq=="B", the deltas are not unique (1 or 3 days) => change by taking for delta the minimum of deltas and checking delta is not null --- pandas/tseries/frequencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index ad3c3b02bd07d..ba31031bb0415 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -239,7 +239,8 @@ def get_freq(self) -> Optional[str]: if not self.is_monotonic or not self.index._is_unique: return None - if self.is_unique and _is_multiple(self.deltas[0], _ONE_DAY): + delta = min(self.deltas) + if delta and _is_multiple(delta, _ONE_DAY): return self._infer_daily_rule() # Business hourly, maybe. 17: one day / 65: one weekend From ce8e8d309ed84abe2f90ffcc005de93cf084a82e Mon Sep 17 00:00:00 2001 From: GFJ138 Date: Tue, 9 Feb 2021 06:22:57 +0100 Subject: [PATCH 4/5] add whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 17d8c79994dbe..4dc8875047c7e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -284,6 +284,7 @@ Datetimelike - Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`) - Bug in :meth:`Timedelta.round`, :meth:`Timedelta.floor`, :meth:`Timedelta.ceil` for values near the implementation bounds of :class:`Timedelta` (:issue:`38964`) - Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`) +- Bug in :func:`infer_freq` incorrectly fails to infer 'H' frequency of :class:`DatetimeIndex` if the latter has a timezone and crosses DST boundaries (:issue:`39556`) Timedelta ^^^^^^^^^ From 17e0fb89c3d46863579cfbf6d5c74f717eb1bd7c Mon Sep 17 00:00:00 2001 From: GFJ138 Date: Tue, 9 Feb 2021 06:34:54 +0100 Subject: [PATCH 5/5] as self.deltas is already ordered, deltas[0] is the minimum delta --- pandas/tseries/frequencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index ba31031bb0415..2257ce47d8936 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -239,7 +239,7 @@ def get_freq(self) -> Optional[str]: if not self.is_monotonic or not self.index._is_unique: return None - delta = min(self.deltas) + delta = self.deltas[0] if delta and _is_multiple(delta, _ONE_DAY): return self._infer_daily_rule()