Skip to content

Commit b0651ca

Browse files
AdamGleaveTomAugspurger
authored andcommitted
BUG: fix infer frequency for business daily (pandas-dev#16683)
1 parent d8cd9ca commit b0651ca

File tree

6 files changed

+61
-67
lines changed

6 files changed

+61
-67
lines changed

asv_bench/benchmarks/timeseries.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,11 @@ def setup(self):
5353
self.rng6 = date_range(start='1/1/1', periods=self.N, freq='B')
5454

5555
self.rng7 = date_range(start='1/1/1700', freq='D', periods=100000)
56-
self.a = self.rng7[:50000].append(self.rng7[50002:])
56+
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
57+
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
58+
59+
self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
60+
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
5761

5862
def time_add_timedelta(self):
5963
(self.rng + dt.timedelta(minutes=2))
@@ -94,8 +98,14 @@ def time_infer_dst(self):
9498
def time_timeseries_is_month_start(self):
9599
self.rng6.is_month_start
96100

97-
def time_infer_freq(self):
98-
infer_freq(self.a)
101+
def time_infer_freq_none(self):
102+
infer_freq(self.no_freq)
103+
104+
def time_infer_freq_daily(self):
105+
infer_freq(self.d_freq)
106+
107+
def time_infer_freq_business(self):
108+
infer_freq(self.b_freq)
99109

100110

101111
class TimeDatetimeConverter(object):

doc/source/whatsnew/v0.20.3.txt

+13-54
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,33 @@
11
.. _whatsnew_0203:
22

3-
v0.20.3 (June ??, 2017)
3+
v0.20.3 (July 7, 2017)
44
-----------------------
55

6-
This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes,
7-
bug fixes and performance improvements.
8-
We recommend that all users upgrade to this version.
6+
This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes
7+
and bug fixes. We recommend that all users upgrade to this version.
98

109
.. contents:: What's new in v0.20.3
1110
:local:
1211
:backlinks: none
1312

14-
15-
.. _whatsnew_0203.enhancements:
16-
17-
Enhancements
18-
~~~~~~~~~~~~
19-
20-
21-
22-
23-
24-
25-
.. _whatsnew_0203.performance:
26-
27-
Performance Improvements
28-
~~~~~~~~~~~~~~~~~~~~~~~~
29-
30-
31-
32-
33-
34-
3513
.. _whatsnew_0203.bug_fixes:
3614

3715
Bug Fixes
3816
~~~~~~~~~
39-
- Fixed issue with dataframe scatter plot for categorical data that reports incorrect column key not found when categorical data is used for plotting (:issue:`16199`)
40-
- Fixed issue with :meth:`DataFrame.style` where element id's were not unique (:issue:`16780`)
17+
18+
- Fixed an issue with scatter plot for categorical data that incorrectly raised a ``KeyError`` when categorical data is used for plotting (:issue:`16199`)
19+
- Fixed an issue with :meth:`DataFrame.style` where element ids were not unique (:issue:`16780`)
4120
- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`)
42-
- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
21+
- Fixed loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`)
4322
- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`)
44-
- Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`)
45-
23+
- Fixed a bug in a slicing DataFrame/Series that have a ``TimedeltaIndex`` (:issue:`16637`)
4624

4725
Conversion
4826
^^^^^^^^^^
4927

5028
- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`)
5129
- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`).
52-
- Bug in ``DataFrame.astype()`` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
30+
- Bug in :meth:`DataFrame.astype` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`).
5331

5432
Indexing
5533
^^^^^^^^
@@ -62,40 +40,21 @@ I/O
6240

6341
- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`)
6442
- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)
65-
- Bug in :func:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
43+
- Bug in :meth:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`)
6644

6745
Plotting
6846
^^^^^^^^
69-
- Fix regression in series plotting that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`)
70-
71-
72-
73-
Groupby/Resample/Rolling
74-
^^^^^^^^^^^^^^^^^^^^^^^^
75-
76-
77-
78-
Sparse
79-
^^^^^^
80-
81-
8247

48+
- Fixed regression that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`)
8349

8450
Reshaping
8551
^^^^^^^^^
52+
8653
- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`)
8754
- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`).
8855
- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`)
8956

90-
91-
Numeric
92-
^^^^^^^
93-
94-
9557
Categorical
9658
^^^^^^^^^^^
9759

98-
- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`)
99-
100-
Other
101-
^^^^^
60+
- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`)

doc/source/whatsnew/v0.21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ Groupby/Resample/Rolling
169169
^^^^^^^^^^^^^^^^^^^^^^^^
170170
- Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`)
171171

172-
172+
- Bug in ``infer_freq`` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`)
173173

174174
Sparse
175175
^^^^^^

pandas/tests/indexes/timedeltas/test_timedelta.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -564,15 +564,23 @@ def test_freq_conversion(self):
564564

565565

566566
class TestSlicing(object):
567+
@pytest.mark.parametrize('freq', ['B', 'D'])
568+
def test_timedelta(self, freq):
569+
index = date_range('1/1/2000', periods=50, freq=freq)
567570

568-
def test_timedelta(self):
569-
# this is valid too
570-
index = date_range('1/1/2000', periods=50, freq='B')
571571
shifted = index + timedelta(1)
572572
back = shifted + timedelta(-1)
573-
assert tm.equalContents(index, back)
574-
assert shifted.freq == index.freq
575-
assert shifted.freq == back.freq
573+
tm.assert_index_equal(index, back)
574+
575+
if freq == 'D':
576+
expected = pd.tseries.offsets.Day(1)
577+
assert index.freq == expected
578+
assert shifted.freq == expected
579+
assert back.freq == expected
580+
else: # freq == 'B'
581+
assert index.freq == pd.tseries.offsets.BusinessDay(1)
582+
assert shifted.freq is None
583+
assert back.freq == pd.tseries.offsets.BusinessDay(1)
576584

577585
result = index - timedelta(1)
578586
expected = index + timedelta(-1)

pandas/tests/tseries/test_frequencies.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -504,9 +504,14 @@ def test_raise_if_too_few(self):
504504
pytest.raises(ValueError, frequencies.infer_freq, index)
505505

506506
def test_business_daily(self):
507-
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
507+
index = _dti(['01/01/1999', '1/4/1999', '1/5/1999'])
508508
assert frequencies.infer_freq(index) == 'B'
509509

510+
def test_business_daily_look_alike(self):
511+
# GH 16624, do not infer 'B' when 'weekend' (2-day gap) in wrong place
512+
index = _dti(['12/31/1998', '1/3/1999', '1/4/1999'])
513+
assert frequencies.infer_freq(index) is None
514+
510515
def test_day(self):
511516
self._check_tick(timedelta(1), 'D')
512517

pandas/tseries/frequencies.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -975,8 +975,7 @@ def _infer_daily_rule(self):
975975
else:
976976
return _maybe_add_count('D', days)
977977

978-
# Business daily. Maybe
979-
if self.day_deltas == [1, 3]:
978+
if self._is_business_daily():
980979
return 'B'
981980

982981
wom_rule = self._get_wom_rule()
@@ -1012,6 +1011,19 @@ def _get_monthly_rule(self):
10121011
return {'cs': 'MS', 'bs': 'BMS',
10131012
'ce': 'M', 'be': 'BM'}.get(pos_check)
10141013

1014+
def _is_business_daily(self):
1015+
# quick check: cannot be business daily
1016+
if self.day_deltas != [1, 3]:
1017+
return False
1018+
1019+
# probably business daily, but need to confirm
1020+
first_weekday = self.index[0].weekday()
1021+
shifts = np.diff(self.index.asi8)
1022+
shifts = np.floor_divide(shifts, _ONE_DAY)
1023+
weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
1024+
return np.all(((weekdays == 0) & (shifts == 3)) |
1025+
((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
1026+
10151027
def _get_wom_rule(self):
10161028
# wdiffs = unique(np.diff(self.index.week))
10171029
# We also need -47, -49, -48 to catch index spanning year boundary

0 commit comments

Comments
 (0)