Skip to content

Commit 49a3783

Browse files
committed
BUG: Fix Period and PeriodIndex support of combined alias offsets
Closes GH13730.
1 parent 7e15923 commit 49a3783

File tree

7 files changed

+268
-40
lines changed

7 files changed

+268
-40
lines changed

doc/source/whatsnew/v0.19.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,8 @@ Deprecations
783783
- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here <cookbook.pivot>` (:issue:`736`)
784784
- ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property. (:issue:`13727`)
785785
- ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package <http://xarray.pydata.org/en/stable/>`__. Pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion. (:issue:`13564`)
786+
- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`)
787+
- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`)
786788

787789
.. _whatsnew_0190.prior_deprecations:
788790

@@ -968,3 +970,4 @@ Bug Fixes
968970
- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)
969971

970972
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
973+
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)

pandas/src/period.pyx

+7-1
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ cdef class _Period(object):
739739
msg = 'Input cannot be converted to Period(freq={0})'
740740
raise IncompatibleFrequency(msg.format(self.freqstr))
741741
elif isinstance(other, offsets.DateOffset):
742-
freqstr = frequencies.get_standard_freq(other)
742+
freqstr = other.rule_code
743743
base = frequencies.get_base_alias(freqstr)
744744
if base == self.freq.rule_code:
745745
ordinal = self.ordinal + other.n
@@ -806,6 +806,7 @@ cdef class _Period(object):
806806
-------
807807
resampled : Period
808808
"""
809+
freq = self._maybe_convert_freq(freq)
809810
how = _validate_end_alias(how)
810811
base1, mult1 = frequencies.get_freq_code(self.freq)
811812
base2, mult2 = frequencies.get_freq_code(freq)
@@ -849,6 +850,8 @@ cdef class _Period(object):
849850
-------
850851
Timestamp
851852
"""
853+
if freq is not None:
854+
freq = self._maybe_convert_freq(freq)
852855
how = _validate_end_alias(how)
853856

854857
if freq is None:
@@ -1122,6 +1125,9 @@ class Period(_Period):
11221125

11231126
cdef _Period self
11241127

1128+
if freq is not None:
1129+
freq = cls._maybe_convert_freq(freq)
1130+
11251131
if ordinal is not None and value is not None:
11261132
raise ValueError(("Only value or ordinal but not both should be "
11271133
"given but not both"))

pandas/tseries/frequencies.py

+58-27
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import pandas.core.algorithms as algos
1616
from pandas.core.algorithms import unique
1717
from pandas.tseries.offsets import DateOffset
18-
from pandas.util.decorators import cache_readonly
18+
from pandas.util.decorators import cache_readonly, deprecate_kwarg
1919
import pandas.tseries.offsets as offsets
2020
import pandas.lib as lib
2121
import pandas.tslib as tslib
@@ -386,51 +386,85 @@ def get_period_alias(offset_str):
386386
_INVALID_FREQ_ERROR = "Invalid frequency: {0}"
387387

388388

389-
def to_offset(freqstr):
389+
@deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
390+
def to_offset(freq):
390391
"""
391-
Return DateOffset object from string representation or
392-
Timedelta object
392+
Return DateOffset object from string or tuple representation
393+
or datetime.timedelta object
394+
395+
Parameters
396+
----------
397+
freq : str, tuple, datetime.timedelta, DateOffset or None
398+
399+
Returns
400+
-------
401+
delta : DateOffset
402+
None if freq is None
403+
404+
Raises
405+
------
406+
ValueError
407+
If freq is an invalid frequency
408+
409+
See Also
410+
--------
411+
pandas.DateOffset
393412
394413
Examples
395414
--------
396-
>>> to_offset('5Min')
397-
Minute(5)
415+
>>> to_offset('5min')
416+
<5 * Minutes>
417+
418+
>>> to_offset('1D1H')
419+
<25 * Hours>
420+
421+
>>> to_offset(('W', 2))
422+
<2 * Weeks: weekday=6>
423+
424+
>>> to_offset((2, 'B'))
425+
<2 * BusinessDays>
426+
427+
>>> to_offset(datetime.timedelta(days=1))
428+
<Day>
429+
430+
>>> to_offset(Hour())
431+
<Hour>
398432
"""
399-
if freqstr is None:
433+
if freq is None:
400434
return None
401435

402-
if isinstance(freqstr, DateOffset):
403-
return freqstr
436+
if isinstance(freq, DateOffset):
437+
return freq
404438

405-
if isinstance(freqstr, tuple):
406-
name = freqstr[0]
407-
stride = freqstr[1]
439+
if isinstance(freq, tuple):
440+
name = freq[0]
441+
stride = freq[1]
408442
if isinstance(stride, compat.string_types):
409443
name, stride = stride, name
410444
name, _ = _base_and_stride(name)
411445
delta = get_offset(name) * stride
412446

413-
elif isinstance(freqstr, timedelta):
447+
elif isinstance(freq, timedelta):
414448
delta = None
415-
freqstr = Timedelta(freqstr)
449+
freq = Timedelta(freq)
416450
try:
417-
for name in freqstr.components._fields:
451+
for name in freq.components._fields:
418452
offset = _name_to_offset_map[name]
419-
stride = getattr(freqstr.components, name)
453+
stride = getattr(freq.components, name)
420454
if stride != 0:
421455
offset = stride * offset
422456
if delta is None:
423457
delta = offset
424458
else:
425459
delta = delta + offset
426460
except Exception:
427-
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
461+
raise ValueError(_INVALID_FREQ_ERROR.format(freq))
428462

429463
else:
430464
delta = None
431465
stride_sign = None
432466
try:
433-
for stride, name, _ in opattern.findall(freqstr):
467+
for stride, name, _ in opattern.findall(freq):
434468
offset = get_offset(name)
435469
if stride_sign is None:
436470
stride_sign = -1 if stride.startswith('-') else 1
@@ -443,10 +477,10 @@ def to_offset(freqstr):
443477
else:
444478
delta = delta + offset
445479
except Exception:
446-
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
480+
raise ValueError(_INVALID_FREQ_ERROR.format(freq))
447481

448482
if delta is None:
449-
raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
483+
raise ValueError(_INVALID_FREQ_ERROR.format(freq))
450484

451485
return delta
452486

@@ -542,14 +576,11 @@ def get_standard_freq(freq):
542576
"""
543577
Return the standardized frequency string
544578
"""
545-
if freq is None:
546-
return None
547579

548-
if isinstance(freq, DateOffset):
549-
return freq.rule_code
550-
551-
code, stride = get_freq_code(freq)
552-
return _get_freq_str(code, stride)
580+
msg = ("get_standard_freq is deprecated. Use to_offset(freq).rule_code "
581+
"instead.")
582+
warnings.warn(msg, FutureWarning, stacklevel=2)
583+
return to_offset(freq).rule_code
553584

554585
# ---------------------------------------------------------------------
555586
# Period codes

pandas/tseries/period.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def dt64arr_to_periodarr(data, freq, tz):
5757
if data.dtype != np.dtype('M8[ns]'):
5858
raise ValueError('Wrong dtype: %s' % data.dtype)
5959

60+
freq = Period._maybe_convert_freq(freq)
6061
base, mult = _gfc(freq)
6162
return period.dt64arr_to_periodarr(data.view('i8'), base, tz)
6263

@@ -206,6 +207,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
206207

207208
@classmethod
208209
def _generate_range(cls, start, end, periods, freq, fields):
210+
if freq is not None:
211+
freq = Period._maybe_convert_freq(freq)
212+
209213
field_count = len(fields)
210214
if com._count_not_none(start, end) > 0:
211215
if field_count > 0:
@@ -222,6 +226,9 @@ def _generate_range(cls, start, end, periods, freq, fields):
222226

223227
@classmethod
224228
def _from_arraylike(cls, data, freq, tz):
229+
if freq is not None:
230+
freq = Period._maybe_convert_freq(freq)
231+
225232
if not isinstance(data, (np.ndarray, PeriodIndex,
226233
DatetimeIndex, Int64Index)):
227234
if is_scalar(data) or isinstance(data, Period):
@@ -478,7 +485,7 @@ def asfreq(self, freq=None, how='E'):
478485
"""
479486
how = _validate_end_alias(how)
480487

481-
freq = frequencies.get_standard_freq(freq)
488+
freq = Period._maybe_convert_freq(freq)
482489

483490
base1, mult1 = _gfc(self.freq)
484491
base2, mult2 = _gfc(freq)
@@ -579,6 +586,8 @@ def to_timestamp(self, freq=None, how='start'):
579586
if freq is None:
580587
base, mult = _gfc(self.freq)
581588
freq = frequencies.get_to_timestamp_base(base)
589+
else:
590+
freq = Period._maybe_convert_freq(freq)
582591

583592
base, mult = _gfc(freq)
584593
new_data = self.asfreq(freq, how)
@@ -596,7 +605,7 @@ def _maybe_convert_timedelta(self, other):
596605
if nanos % offset_nanos == 0:
597606
return nanos // offset_nanos
598607
elif isinstance(other, offsets.DateOffset):
599-
freqstr = frequencies.get_standard_freq(other)
608+
freqstr = other.rule_code
600609
base = frequencies.get_base_alias(freqstr)
601610
if base == self.freq.rule_code:
602611
return other.n

pandas/tseries/tests/test_offsets.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -4591,21 +4591,30 @@ def test_parse_time_quarter_w_dash(self):
45914591

45924592

45934593
def test_get_standard_freq():
4594-
fstr = get_standard_freq('W')
4595-
assert fstr == get_standard_freq('w')
4596-
assert fstr == get_standard_freq('1w')
4597-
assert fstr == get_standard_freq(('W', 1))
4594+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4595+
fstr = get_standard_freq('W')
4596+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4597+
assert fstr == get_standard_freq('w')
4598+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4599+
assert fstr == get_standard_freq('1w')
4600+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4601+
assert fstr == get_standard_freq(('W', 1))
45984602

45994603
with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
4600-
get_standard_freq('WeEk')
4604+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4605+
get_standard_freq('WeEk')
46014606

4602-
fstr = get_standard_freq('5Q')
4603-
assert fstr == get_standard_freq('5q')
4607+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4608+
fstr = get_standard_freq('5Q')
4609+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4610+
assert fstr == get_standard_freq('5q')
46044611

46054612
with tm.assertRaisesRegexp(ValueError, _INVALID_FREQ_ERROR):
4606-
get_standard_freq('5QuarTer')
4613+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4614+
get_standard_freq('5QuarTer')
46074615

4608-
assert fstr == get_standard_freq(('q', 5))
4616+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
4617+
assert fstr == get_standard_freq(('q', 5))
46094618

46104619

46114620
def test_quarterly_dont_normalize():

0 commit comments

Comments
 (0)