Skip to content

Commit e832ddf

Browse files
AdamGleavejreback
authored andcommitted
BUG: fix infer frequency for business daily (#16683)
1 parent d8cd9ca commit e832ddf

File tree

6 files changed

+48
-14
lines changed

6 files changed

+48
-14
lines changed

asv_bench/benchmarks/timeseries.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def setup(self):
5353
self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B')
5454

5555
self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000)
56-
self.a = self.rng7[:50000].append(self.rng7[50002:])
56+
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
57+
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
58+
59+
self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
60+
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
5761

5862
def time_add_timedelta(self):
5963
(self.rng + dt.timedelta(minutes=2))
@@ -94,8 +98,14 @@ def time_infer_dst(self):
9498
def time_timeseries_is_month_start(self):
9599
self.rng6.is_month_start
96100

97-
def time_infer_freq(self):
98-
infer_freq(self.a)
101+
def time_infer_freq_none(self):
102+
infer_freq(self.no_freq)
103+
104+
def time_infer_freq_daily(self):
105+
infer_freq(self.d_freq)
106+
107+
def time_infer_freq_business(self):
108+
infer_freq(self.b_freq)
99109

100110

101111
class TimeDatetimeConverter(object):

doc/source/whatsnew/v0.20.3.txt

-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ Groupby/Resample/Rolling
7474
^^^^^^^^^^^^^^^^^^^^^^^^
7575

7676

77-
7877
Sparse
7978
^^^^^^
8079

doc/source/whatsnew/v0.21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ Groupby/Resample/Rolling
169169
^^^^^^^^^^^^^^^^^^^^^^^^
170170
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
171171

172-
172+
- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
173173

174174
Sparse
175175
^^^^^^

pandas/tests/indexes/timedeltas/test_timedelta.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -564,15 +564,23 @@ def test_freq_conversion(self):
564564

565565

566566
class TestSlicing(object):
567+
@pytest.mark.parametrize('freq', ['B', 'D'])
568+
def test_timedelta(self, freq):
569+
index = date_range('1/1/2000', periods=50, freq=freq)
567570

568-
def test_timedelta(self):
569-
# this is valid too
570-
index = date_range('1/1/2000', periods=50, freq='B')
571571
shifted = index + timedelta(1)
572572
back = shifted + timedelta(-1)
573-
assert tm.equalContents(index, back)
574-
assert shifted.freq == index.freq
575-
assert shifted.freq == back.freq
573+
tm.assert_index_equal(index, back)
574+
575+
if freq == 'D':
576+
expected = pd.tseries.offsets.Day(1)
577+
assert index.freq == expected
578+
assert shifted.freq == expected
579+
assert back.freq == expected
580+
else: # freq == 'B'
581+
assert index.freq == pd.tseries.offsets.BusinessDay(1)
582+
assert shifted.freq is None
583+
assert back.freq == pd.tseries.offsets.BusinessDay(1)
576584

577585
result = index - timedelta(1)
578586
expected = index + timedelta(-1)

pandas/tests/tseries/test_frequencies.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,14 @@ def test_raise_if_too_few(self):
504504
pytest.raises(ValueError, frequencies.infer_freq, index)
505505

506506
def test_business_daily(self):
507-
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
507+
index = _dti(['01/01/1999', '1/4/1999', '1/5/1999'])
508508
assert frequencies.infer_freq(index) == 'B'
509509

510+
def test_business_daily_look_alike(self):
511+
# GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place
512+
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
513+
assert frequencies.infer_freq(index) is None
514+
510515
def test_day(self):
511516
self._check_tick(timedelta(1), 'D')
512517

pandas/tseries/frequencies.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -975,8 +975,7 @@ def _infer_daily_rule(self):
975975
else:
976976
return _maybe_add_count('D', days)
977977

978-
# Business daily. Maybe
979-
if self.day_deltas == [1, 3]:
978+
if self._is_business_daily():
980979
return 'B'
981980

982981
wom_rule = self._get_wom_rule()
@@ -1012,6 +1011,19 @@ def _get_monthly_rule(self):
10121011
return {'cs': 'MS', 'bs': 'BMS',
10131012
'ce': 'M', 'be': 'BM'}.get(pos_check)
10141013

1014+
def _is_business_daily(self):
1015+
# quick check: cannot be business daily
1016+
if self.day_deltas != [1, 3]:
1017+
return False
1018+
1019+
# probably business daily, but need to confirm
1020+
first_weekday = self.index[0].weekday()
1021+
shifts = np.diff(self.index.asi8)
1022+
shifts = np.floor_divide(shifts, _ONE_DAY)
1023+
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
1024+
return np.all(((weekdays == 0) & (shifts == 3)) |
1025+
((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
1026+
10151027
def _get_wom_rule(self):
10161028
# wdiffs = unique(np.diff(self.index.week))
10171029
# We also need -47, -49, -48 to catch index spanning year boundary

0 commit comments

Comments
 (0)