From 4069b8a300ef0dcd9cfafb21c7f6ead0ff8b6ddc Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Sun, 6 Oct 2013 22:52:20 -0400 Subject: [PATCH 1/4] ENH: Extends the number of Periods supported by allowing for Python defined Periods. Periods now hold reference to Offset rather than just freqstr (GH5148). --- doc/source/release.rst | 9 + doc/source/v0.14.0.txt | 3 + pandas/core/series.py | 2 +- pandas/tseries/frequencies.py | 62 +++++- pandas/tseries/index.py | 11 +- pandas/tseries/offsets.py | 150 ++++++++++++- pandas/tseries/period.py | 173 ++++++++++----- pandas/tseries/resample.py | 7 +- pandas/tseries/tests/test_offsets.py | 60 ++++- pandas/tseries/tests/test_period.py | 320 ++++++++++++++++++++++++++- pandas/tseries/tools.py | 89 +++++--- 11 files changed, 763 insertions(+), 123 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 121cfb92b0eb2..bb4567f29fce2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -312,6 +312,11 @@ Improvements to existing features in item handling (:issue:`6745`, :issue:`6988`). - Improve performance in certain reindexing operations by optimizing ``take_2d`` (:issue:`6749`) - Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) +- Constructor for ``Period`` now takes full set of possible ``Offset`` objects for ``freq`` + parameter. (:issue:`4878`) +- Extends the number of ``Period``s supported by allowing for Python defined ``Period``s (:issue:`5148`) +- Added ``inferred_freq_offset`` as property on ``DatetimeIndex`` to provide the actual + Offset object rather than the string representation (:issue:`5082`). .. _release.bug_fixes-0.14.0: @@ -458,7 +463,11 @@ Bug Fixes - Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`) - Bug in timeseries-with-frequency plot cursor display (:issue:`5453`) - Bug surfaced in groupby.plot when using a ``Float64Index`` (:issue:`7025`) +<<<<<<< HEAD - Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`) +======= +- Bug in not correctly treading 'QS', 'BQS', 'BQ' as frquency aliases (:issue:`5028`). +>>>>>>> ENH: Extends the number of Periods supported by allowing for Python defined Periods. Periods now hold reference to Offset rather than just freqstr (GH5148). pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index f19c1210b6a37..06bd4ee6e444c 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -554,6 +554,9 @@ Enhancements values='Quantity', aggfunc=np.sum) - str.wrap implemented (:issue:`6999`) +- Constructor for ``Period`` now takes full set of possible ``Offset`` objects for ``freq`` + parameter. (:issue:`4878`) +- Extends the number of ``Period``s supported by allowing for Python defined ``Period``s (:issue:`5148`) .. _whatsnew_0140.performance: diff --git a/pandas/core/series.py b/pandas/core/series.py index 74f038b2bad23..ce3a972b30ac0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2361,7 +2361,7 @@ def to_period(self, freq=None, copy=True): new_values = new_values.copy() if freq is None: - freq = self.index.freqstr or self.index.inferred_freq + freq = self.index.freq or self.index.inferred_freq_offset new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index).__finalize__(self) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index e3c933e116987..be41e363e870c 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -71,7 +71,7 @@ def get_freq(freq): return freq -def get_freq_code(freqstr): +def get_freq_code(freqstr, as_periodstr=False): """ Parameters @@ -81,7 +81,13 @@ def get_freq_code(freqstr): ------- """ if isinstance(freqstr, DateOffset): - freqstr = (get_offset_name(freqstr), freqstr.n) + freqstr_raw = get_offset_name(freqstr) + + #if we can, convert to canonical period str + if as_periodstr: + freqstr_raw = get_period_alias(freqstr_raw) + + freqstr = (freqstr_raw, freqstr.n) if isinstance(freqstr, tuple): if (com.is_integer(freqstr[0]) and @@ -113,7 +119,7 @@ def _get_freq_str(base, mult=1): code = _reverse_period_code_map.get(base) if mult == 1: return code - return str(mult) + code + return "%s%s" % (mult, code) #---------------------------------------------------------------------- @@ -202,6 +208,9 @@ def get_period_alias(offset_str): 'Q@FEB': 'BQ-FEB', 'Q@MAR': 'BQ-MAR', 'Q': 'Q-DEC', + 'QS': 'QS-JAN', + 'BQ': 'BQ-DEC', + 'BQS': 'BQS-JAN', 'A': 'A-DEC', # YearEnd(month=12), 'AS': 'AS-JAN', # YearBegin(month=1), @@ -387,19 +396,44 @@ def get_legacy_offset_name(offset): name = offset.name return _legacy_reverse_map.get(name, name) -def get_standard_freq(freq): +def get_standard_freq(freq, as_periodstr=False): """ - Return the standardized frequency string + Return the standardized frequency string. + as_periodstr=True returns the string representing the period rather than + the frequency. An example when these may differ is MonthBegin. + MonthBegin and MonthEnd are two different frequencies but they define the + same period. + + >>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=False) + 'L' + >>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=False) + 'M' + >>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=True) + 'M' + >>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=True) + 'M' """ if freq is None: return None - if isinstance(freq, DateOffset): - return get_offset_name(freq) + code, stride = get_freq_code(freq, as_periodstr=as_periodstr) - code, stride = get_freq_code(freq) return _get_freq_str(code, stride) +def _get_standard_period_freq_impl(freq): + return get_standard_freq(freq, as_periodstr=True) + +def get_standard_period_freq(freq): + if isinstance(freq, DateOffset): + return freq.periodstr + + return _get_standard_period_freq_impl(freq) + +def _assert_mult_1(mult): + if mult != 1: + # TODO: Better error message - this is slightly confusing + raise ValueError('Only mult == 1 supported') + #---------------------------------------------------------------------- # Period codes @@ -887,9 +921,11 @@ def is_subperiod(source, target): ------- is_subperiod : boolean """ + source_raw = source if isinstance(source, offsets.DateOffset): source = source.rule_code + target_raw = target if isinstance(target, offsets.DateOffset): target = target.rule_code @@ -918,6 +954,10 @@ def is_subperiod(source, target): return source in ['T', 'S'] elif target == 'S': return source in ['S'] + elif isinstance(source_raw, offsets._NonCythonPeriod): + return source_raw.is_subperiod(target_raw) + elif isinstance(target_raw, offsets._NonCythonPeriod): + return target_raw.is_superperiod(source_raw) def is_superperiod(source, target): @@ -936,9 +976,11 @@ def is_superperiod(source, target): ------- is_superperiod : boolean """ + source_raw = source if isinstance(source, offsets.DateOffset): source = source.rule_code + target_raw = target if isinstance(target, offsets.DateOffset): target = target.rule_code @@ -971,6 +1013,10 @@ def is_superperiod(source, target): return target in ['T', 'S'] elif source == 'S': return target in ['S'] + elif isinstance(source_raw, offsets._NonCythonPeriod): + return source_raw.is_superperiod(target_raw) + elif isinstance(target_raw, offsets._NonCythonPeriod): + return target_raw.is_subperiod(source_raw) def _get_rule_month(source, default='DEC'): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index a2e01c8110261..de25fbd8c3baa 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -792,8 +792,8 @@ def to_period(self, freq=None): msg = "You must pass a freq argument as current index has none." raise ValueError(msg) - if freq is None: - freq = get_period_alias(self.freqstr) + if freq is None: # No reason no convert to str; keep w/e freq is + freq = self.freq #get_period_alias(self.freqstr) return PeriodIndex(self.values, freq=freq, tz=self.tz) @@ -1427,6 +1427,13 @@ def inferred_freq(self): except ValueError: return None + @cache_readonly + def inferred_freq_offset(self): + if self.inferred_freq is not None: + return get_offset(self.inferred_freq) + else: + return None + @property def freqstr(self): """ return the frequency object as a string if its set, otherwise None """ diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 1b8b82235cf08..04ae0603a0b85 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,7 +3,7 @@ from pandas import compat import numpy as np -from pandas.tseries.tools import to_datetime +from pandas.tseries.tools import to_datetime, _try_parse_qtr_time_string # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday @@ -60,6 +60,52 @@ class CacheableOffset(object): _cacheable = True +class _NonCythonPeriod(object): + """ + This class represents the base class for Offsets for which Period logic is + not implemented in Cython. This allows fully Python defined Offsets with + Period support + """ + + def get_start_dt(self, ordinal): + raise NotImplementedError("get_start_dt") + + def get_end_dt(self, ordinal): + raise NotImplementedError("get_end_dt") + + def period_format(self, ordinal, fmt=None): + raise NotImplementedError("period_format") + + def get_period_ordinal(self, dt): + raise NotImplementedError("get_period_ordinal") + + def dt64arr_to_periodarr(self, data, tz): + f = np.vectorize(lambda x: self.get_period_ordinal(Timestamp(x))) + return f(data.view('i8')) + + def period_asfreq_arr(self, values, freq, end): + from pandas.tseries.period import Period + f = np.vectorize(lambda x: + Period(value=self.period_asfreq_value(x, end), freq=freq).ordinal) + return f(values.view('i8')) + + def period_fromfreq_arr(self, values, freq_int_from, end): + from pandas.tseries.period import _change_period_freq + offset = 0 if end else 1 + f = np.vectorize(lambda x: + _change_period_freq(x, freq_int_from, self).ordinal - offset) + return f(values.view('i8')) + + def period_asfreq_value(self, ordinal, end): + return self.get_end_dt(ordinal) if end else self.get_start_dt(ordinal) + + def is_superperiod(self, target): + raise NotImplementedError("is_superperiod") + + def is_subperiod(self, target): + raise NotImplementedError("is_subperiod") + + class DateOffset(object): """ Standard kind of date increment used for a date range. @@ -295,6 +341,19 @@ def freqstr(self): return fstr + @property + def periodstr(self): + """ + The string representation for the Period defined by this offset. + This may differ from freqstr which defines a freq. For example Month vs. + start of Month. + """ + from pandas.tseries.frequencies import _get_standard_period_freq_impl + return _get_standard_period_freq_impl(self) + + def parse_time_string(self, arg): + return None + class SingleConstructorOffset(DateOffset): @classmethod @@ -1654,14 +1713,14 @@ def get_rule_code_suffix(self): _int_to_weekday[self.weekday]) @classmethod - def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): - if varion_code == "N": + def _parse_suffix(cls, variation_code, startingMonth_code, weekday_code): + if variation_code == "N": variation = "nearest" - elif varion_code == "L": + elif variation_code == "L": variation = "last" else: raise ValueError( - "Unable to parse varion_code: %s" % (varion_code,)) + "Unable to parse variation_code: %s" % (variation_code,)) startingMonth = _month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] @@ -1677,7 +1736,7 @@ def _from_name(cls, *args): return cls(**cls._parse_suffix(*args)) -class FY5253Quarter(DateOffset): +class FY5253Quarter(_NonCythonPeriod, DateOffset): """ DateOffset increments between business quarter dates for 52-53 week fiscal year (also known as a 4-4-5 calendar). @@ -1828,6 +1887,85 @@ def rule_code(self): def _from_name(cls, *args): return cls(**dict(FY5253._parse_suffix(*args[:-1]), qtr_with_extra_week=int(args[-1]))) + + def _get_ordinal_from_y_q(self, fy, fq): + """Take zero indexed fq""" + return fy * 4 + fq + + def get_period_ordinal(self, dt): + year_end = self._offset.get_year_end(dt) + year_end_year = year_end.year + + if dt <= year_end: + if year_end.month < self._offset.startingMonth: + year_end_year -= 1 + fy = year_end_year + else: + fy = year_end_year + 1 + year_end = year_end + self._offset + + fq = 4 + while dt <= year_end: + year_end = year_end - self + fq -= 1 + + return self._get_ordinal_from_y_q(fy, fq) + + @property + def periodstr(self): + return self.rule_code + + def period_format(self, ordinal, fmt=None): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + return "%dQ%d" % (fy, fq) + + def parse_time_string(self, arg): + qtr_parsed = _try_parse_qtr_time_string(arg) + if qtr_parsed is None: + return None + else: + fy, fq = qtr_parsed + return self.get_end_dt(self._get_ordinal_from_y_q(fy, fq - 1)) + + def get_start_dt(self, ordinal): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + year_end = self._offset.get_year_end(datetime(fy, 1, 1)) + countdown = 4-fq+1 + while countdown: + countdown -= 1 + year_end = year_end-self + + return year_end + relativedelta(days=1) + + def get_end_dt(self, ordinal): + fy = ordinal // 4 + fq = (ordinal % 4) + 1 + + year_end = self._offset.get_year_end(datetime(fy, 1, 1)) + countdown = 4-fq + while countdown: + countdown -= 1 + year_end = year_end-self + + return year_end + + def is_superperiod(self, target): + if not isinstance(target, DateOffset): + from pandas.tseries.frequencies import get_offset + target = get_offset(target) + + if type(target) == Week: + return target.weekday == self._offset.weekday + elif type(target) == Day: + return True + + def is_subperiod(self, target): + #TODO Return True for FY5253 after FY5253 handles periods methods + return False class Easter(DateOffset): ''' diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 6d9e32433cd1e..c85ddab6fcacc 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -6,7 +6,8 @@ from pandas.core.base import PandasObject from pandas.tseries.frequencies import (get_freq_code as _gfc, - _month_numbers, FreqGroup) + _month_numbers, FreqGroup, + _assert_mult_1) from pandas.tseries.index import DatetimeIndex, Int64Index, Index from pandas.tseries.tools import parse_time_string import pandas.tseries.frequencies as _freq_mod @@ -20,6 +21,8 @@ import pandas.tslib as tslib import pandas.algos as _algos from pandas.compat import map, zip, u +from pandas.tseries.offsets import DateOffset, _NonCythonPeriod +from pandas.util.decorators import cache_readonly #--------------- @@ -27,7 +30,7 @@ def _period_field_accessor(name, alias): def f(self): - base, mult = _gfc(self.freq) + base, _ = _gfc(self.freq) return tslib.get_period_field(alias, self.ordinal, base) f.__name__ = name return property(f) @@ -35,11 +38,21 @@ def f(self): def _field_accessor(name, alias): def f(self): - base, mult = _gfc(self.freq) + base, _ = _gfc(self.freq) return tslib.get_period_field_arr(alias, self.values, base) f.__name__ = name return property(f) +def _check_freq_mult(freq): + if isinstance(freq, DateOffset): + mult = freq.n + else: + _, mult = _gfc(freq, as_periodstr=True) + + _assert_mult_1(mult) + +def _change_period_freq(ordinal_from, freq_int_from, freq_to): + return Period(Timestamp(tslib.period_ordinal_to_dt64(ordinal_from, freq=freq_int_from)), freq=freq_to) class Period(PandasObject): """ @@ -60,8 +73,8 @@ class Period(PandasObject): minute : int, default 0 second : int, default 0 """ - __slots__ = ['freq', 'ordinal'] - _comparables = ['name','freqstr'] + __slots__ = ['freq_obj', 'ordinal'] + _comparables = ['name', 'freqstr'] def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, @@ -70,8 +83,6 @@ def __init__(self, value=None, freq=None, ordinal=None, # periods such as A, Q, etc. Every five minutes would be, e.g., # ('T', 5) but may be passed in as a string like '5T' - self.freq = None - # ordinal is the period offset from the gregorian proleptic epoch self.ordinal = None @@ -94,9 +105,11 @@ def __init__(self, value=None, freq=None, ordinal=None, elif isinstance(value, Period): other = value - if freq is None or _gfc(freq) == _gfc(other.freq): + if freq is None \ + or freq == other.freq_obj \ + or _gfc(freq, as_periodstr=True) == _gfc(other.freq_obj, as_periodstr=True): self.ordinal = other.ordinal - freq = other.freq + freq = other.freq_obj else: converted = other.asfreq(freq) self.ordinal = converted.ordinal @@ -119,17 +132,22 @@ def __init__(self, value=None, freq=None, ordinal=None, msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) - base, mult = _gfc(freq) - if mult != 1: - # TODO: Better error message - this is slightly confusing - raise ValueError('Only mult == 1 supported') + self.freq_obj = freq + _check_freq_mult(freq) if self.ordinal is None: - self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, dt.microsecond, 0, - base) + if isinstance(freq, _NonCythonPeriod): + self.ordinal = freq.get_period_ordinal(dt) + else: + base, _ = _gfc(freq, as_periodstr=True) + + self.ordinal = tslib.period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, dt.microsecond, 0, + base) - self.freq = _freq_mod._get_freq_str(base) + @cache_readonly + def freq(self): + return _freq_mod.get_standard_period_freq(self.freq_obj) def __eq__(self, other): if isinstance(other, Period): @@ -197,16 +215,23 @@ def asfreq(self, freq, how='E'): resampled : Period """ how = _validate_end_alias(how) - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) + _check_freq_mult(freq) + end = how == 'E' - if mult2 != 1: - raise ValueError('Only mult == 1 supported') + if isinstance(self.freq_obj, _NonCythonPeriod): + value = self.freq_obj.period_asfreq_value(self.ordinal, end) + return Period(value=value, freq=freq) + elif isinstance(freq, _NonCythonPeriod): + freq_int, _ = _gfc(self.freq) + return _change_period_freq(ordinal_from=self.ordinal, freq_int_from=freq_int, freq_to=freq) + else: - end = how == 'E' - new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) + base1, _ = _gfc(self.freq) + base2, _ = _gfc(freq) + + new_ordinal = tslib.period_asfreq(self.ordinal, base1, base2, end) - return Period(ordinal=new_ordinal, freq=base2) + return Period(ordinal=new_ordinal, freq=base2) @property def start_time(self): @@ -264,12 +289,18 @@ def to_timestamp(self, freq=None, how='start', tz=None): @classmethod def now(cls, freq=None): return Period(datetime.now(), freq=freq) - - def __repr__(self): + + def __get_formatted(self, fmt=None): + if isinstance(self.freq_obj, _NonCythonPeriod): + return self.freq_obj.period_format(self.ordinal, fmt=fmt) + base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - freqstr = _freq_mod._reverse_period_code_map[base] + return tslib.period_format(self.ordinal, base, fmt=fmt) + def __repr__(self): + freqstr = _freq_mod.get_standard_period_freq(self.freq_obj) + formatted = self.__get_formatted() + if not compat.PY3: encoding = com.get_option("display.encoding") formatted = formatted.encode(encoding) @@ -283,9 +314,9 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - base, mult = _gfc(self.freq) - formatted = tslib.period_format(self.ordinal, base) - value = ("%s" % formatted) + + formatted = self.__get_formatted() + value = str(formatted) return value def strftime(self, fmt): @@ -425,8 +456,7 @@ def strftime(self, fmt): >>> a.strftime('%b. %d, %Y was a %A') 'Jan. 01, 2001 was a Monday' """ - base, mult = _gfc(self.freq) - return tslib.period_format(self.ordinal, base, fmt) + return self.__get_formatted(fmt) def _get_date_and_freq(value, freq): @@ -467,15 +497,23 @@ def _get_ordinals(data, freq): return lib.map_infer(data, f) +def dt64arr_to_periodarr_impl(data, freq, tz): + base, _ = _gfc(freq, as_periodstr=True) + return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) + + def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) - base, mult = _gfc(freq) - return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) + if isinstance(freq, _NonCythonPeriod): + return freq.dt64arr_to_periodarr(data, tz) + + return dt64arr_to_periodarr_impl(data, freq, tz) # --- Period index sketch + def _period_index_cmp(opname): """ Wrap comparison operations to convert datetime-like to datetime64 @@ -523,7 +561,7 @@ class PeriodIndex(Int64Index): dtype : NumPy dtype (default: i8) copy : bool Make a copy of input ndarray - freq : string or period object, optional + freq : string or DateOffset object, optional One of pandas period strings or corresponding objects start : starting value, period-like, optional If data is None, used as the start point in generating regular @@ -565,7 +603,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, quarter=None, day=None, hour=None, minute=None, second=None, tz=None): - freq = _freq_mod.get_standard_freq(freq) + freq_obj = freq if periods is not None: if com.is_float(periods): @@ -580,17 +618,23 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, else: fields = [year, month, quarter, day, hour, minute, second] data, freq = cls._generate_range(start, end, periods, - freq, fields) + freq_obj, fields) else: - ordinal, freq = cls._from_arraylike(data, freq, tz) + ordinal, freq = cls._from_arraylike(data, freq_obj, tz) data = np.array(ordinal, dtype=np.int64, copy=False) subarr = data.view(cls) subarr.name = name - subarr.freq = freq + + # If freq_obj was initially none, fall back to freq + subarr.freq_obj = freq_obj if freq_obj is not None else freq return subarr + @cache_readonly + def freq(self): + return _freq_mod.get_standard_period_freq(self.freq_obj) + @classmethod def _generate_range(cls, start, end, periods, freq, fields): field_count = com._count_not_none(*fields) @@ -681,7 +725,8 @@ def __contains__(self, key): return key.ordinal in self._engine def _box_values(self, values): - f = lambda x: Period(ordinal=x, freq=self.freq) + freq = self.freq + f = lambda x: Period(ordinal=x, freq=freq) return lib.map_infer(values, f) def asof_locs(self, where, mask): @@ -721,7 +766,7 @@ def astype(self, dtype): def __iter__(self): for val in self.values: - yield Period(ordinal=val, freq=self.freq) + yield Period(ordinal=val, freq=self.freq_obj) @property def is_all_dates(self): @@ -754,21 +799,31 @@ def freqstr(self): def asfreq(self, freq=None, how='E'): how = _validate_end_alias(how) + _check_freq_mult(freq) - freq = _freq_mod.get_standard_freq(freq) + freq_obj = freq - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) + end = how == 'E' - if mult2 != 1: - raise ValueError('Only mult == 1 supported') + if isinstance(self.freq_obj, _NonCythonPeriod): + new_data = self.freq_obj.period_asfreq_arr( + self.values, freq_obj, end) + freq = _freq_mod.get_standard_freq(freq) + elif isinstance(freq_obj, _NonCythonPeriod): + freq = freq_obj.periodstr + freq_int_from, _ = _gfc(self.freq) + new_data = freq_obj.period_fromfreq_arr( + self.values, freq_int_from, end) + else: + freq = _freq_mod.get_standard_freq(freq) + base1, _ = _gfc(self.freq) + base2, _ = _gfc(freq) - end = how == 'E' - new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) + new_data = tslib.period_asfreq_arr(self.values, base1, base2, end) result = new_data.view(PeriodIndex) result.name = self.name - result.freq = freq + result.freq_obj = freq_obj return result def to_datetime(self, dayfirst=False): @@ -892,7 +947,7 @@ def get_value(self, series, key): return _maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: - asdt, parsed, reso = parse_time_string(key, self.freq) + asdt, parsed, reso = parse_time_string(key, self.freq_obj) grp = _freq_mod._infer_period_group(reso) freqn = _freq_mod._period_group(self.freq) @@ -918,7 +973,7 @@ def get_value(self, series, key): except KeyError: pass - key = Period(key, self.freq).ordinal + key = Period(key, self.freq_obj).ordinal return _maybe_box(self, self._engine.get_value(s, key), series, key) def get_loc(self, key): @@ -1033,7 +1088,7 @@ def _wrap_union_result(self, other, result): def _apply_meta(self, rawarr): if not isinstance(rawarr, PeriodIndex): rawarr = rawarr.view(PeriodIndex) - rawarr.freq = self.freq + rawarr.freq_obj = self.freq_obj return rawarr def __getitem__(self, key): @@ -1041,7 +1096,7 @@ def __getitem__(self, key): arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] - return Period(ordinal=val, freq=self.freq) + return Period(ordinal=val, freq=self.freq_obj) else: if com._is_bool_indexer(key): key = np.asarray(key) @@ -1052,9 +1107,9 @@ def __getitem__(self, key): # values = np.asarray(list(values), dtype=object) # return values.reshape(result.shape) - return PeriodIndex(result, name=self.name, freq=self.freq) + return PeriodIndex(result, name=self.name, freq=self.freq_obj) - return PeriodIndex(result, name=self.name, freq=self.freq) + return PeriodIndex(result, name=self.name, freq=self.freq_obj) def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) @@ -1073,7 +1128,7 @@ def __array_finalize__(self, obj): if not self.ndim: # pragma: no cover return self.item() - self.freq = getattr(obj, 'freq', None) + self.freq_obj = getattr(obj, 'freq_obj', None) self.name = getattr(obj, 'name', None) self._reset_identity() @@ -1116,7 +1171,7 @@ def take(self, indices, axis=None): indices = com._ensure_platform_int(indices) taken = self.values.take(indices, axis=axis) taken = taken.view(PeriodIndex) - taken.freq = self.freq + taken.freq_obj = self.freq_obj taken.name = self.name return taken @@ -1173,7 +1228,7 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, nd_state) self.name = own_state[0] try: # backcompat - self.freq = own_state[1] + self.freq_obj = own_state[1] except: pass else: # pragma: no cover diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index dd72a5245e7b2..5cf0a8a98093e 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -295,6 +295,7 @@ def _resample_timestamps(self): def _resample_periods(self): # assumes set_grouper(obj) already called axlabels = self.ax + source_freq = axlabels.freq_obj obj = self.obj if len(axlabels) == 0: @@ -309,7 +310,7 @@ def _resample_periods(self): # Start vs. end of period memb = axlabels.asfreq(self.freq, how=self.convention) - if is_subperiod(axlabels.freq, self.freq) or self.how is not None: + if is_subperiod(source_freq, self.freq) or self.how is not None: # Downsampling rng = np.arange(memb.values[0], memb.values[-1] + 1) bins = memb.searchsorted(rng, side='right') @@ -317,7 +318,7 @@ def _resample_periods(self): grouped = obj.groupby(grouper, axis=self.axis) return grouped.aggregate(self._agg_method) - elif is_superperiod(axlabels.freq, self.freq): + elif is_superperiod(source_freq, self.freq): # Get the fill indexer indexer = memb.get_indexer(new_index, method=self.fill_method, limit=self.limit) @@ -325,7 +326,7 @@ def _resample_periods(self): else: raise ValueError('Frequency %s cannot be resampled to %s' - % (axlabels.freq, self.freq)) + % (source_freq, self.freq)) def _take_new_index(obj, indexer, new_index, axis=0): diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 86635271eb9c1..c01bd4fcae8df 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -17,9 +17,9 @@ WeekOfMonth, format, ole2datetime, QuarterEnd, to_datetime, normalize_date, get_offset, get_offset_name, get_standard_freq) -from pandas.tseries.frequencies import _offset_map +from pandas.tseries.frequencies import _offset_map, cday from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache -from pandas.tseries.tools import parse_time_string +from pandas.tseries.tools import parse_time_string, DateParseError import pandas.tseries.offsets as offsets from pandas.tslib import monthrange, OutOfBoundsDatetime, NaT @@ -1650,6 +1650,7 @@ def test_onOffset(self): offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + tests = [ # From Wikipedia (see: http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Saturday_nearest_the_end_of_month) # 2006-09-02 2006 September 2 @@ -1700,6 +1701,7 @@ def test_onOffset(self): (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False), + ] for offset, date, expected in tests: @@ -1716,6 +1718,7 @@ def test_apply(self): datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2012, 12, 30)] + DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") tests = [ @@ -1932,6 +1935,7 @@ def test_onOffset(self): (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False) + ] for offset, date, expected in tests: @@ -2626,6 +2630,7 @@ def test_get_offset_name(self): self.assertEqual(get_offset_name(makeFY5253LastOfMonthQuarter(weekday=1, startingMonth=3, qtr_with_extra_week=4)),"REQ-L-MAR-TUE-4") self.assertEqual(get_offset_name(makeFY5253NearestEndMonthQuarter(weekday=1, startingMonth=3, qtr_with_extra_week=3)), "REQ-N-MAR-TUE-3") + def test_get_offset(): assertRaisesRegexp(ValueError, "rule.*GIBBERISH", get_offset, 'gibberish') assertRaisesRegexp(ValueError, "rule.*QS-JAN-B", get_offset, 'QS-JAN-B') @@ -2648,12 +2653,18 @@ def test_get_offset(): (name, expected, offset)) -def test_parse_time_string(): - (date, parsed, reso) = parse_time_string('4Q1984') - (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') - assert date == date_lower - assert parsed == parsed_lower - assert reso == reso_lower +class TestParseTimeString(tm.TestCase): + def test_case_sensitivity(self): + (date, parsed, reso) = parse_time_string('4Q1984') + (date_lower, parsed_lower, reso_lower) = parse_time_string('4q1984') + + self.assertEqual(date, date_lower) + self.assertEqual(parsed, parsed_lower) + self.assertEqual(reso, reso_lower) + + def test_invalid_string(self): + self.assertRaises(DateParseError, + parse_time_string, '2013Q1', freq="INVLD-L-DEC-SAT") def test_get_standard_freq(): @@ -2714,6 +2725,37 @@ def test_rule_code(self): self.assertEqual(alias, get_offset(alias).rule_code) self.assertEqual(alias, (get_offset(alias) * 5).rule_code) + #GH5028 + def test_offset_map(self): + for name, offset in compat.iteritems(_offset_map): + if name == 'C' and cday is None: + continue + self.assertEqual(name, None if offset is None else offset.rule_code) + + #GH5028 + def test_many_to_one_mapping(self): + offsets = [ + QuarterBegin(startingMonth=1), + BQuarterBegin(startingMonth=1), + BQuarterEnd(startingMonth=12), + ] + + for offset in offsets: + self.assertEqual(get_offset_name(offset), offset.rule_code) + + def test_aliased_offset_equality(self): + self.assertEqual(get_offset("Q"), get_offset("Q")) + self.assertEqual(get_offset("Q"), get_offset("Q-DEC")) + self.assertEqual(get_offset("QS"), get_offset("QS-JAN")) + self.assertEqual(get_offset("BQ"), get_offset("BQ-DEC")) + self.assertEqual(get_offset("BQS"), get_offset("BQS-JAN")) + + def test_aliased_offset_repr_equality(self): + self.assertEqual(repr(get_offset("Q")), repr(get_offset("Q"))) + self.assertEqual(repr(get_offset("Q")), repr(get_offset("Q-DEC"))) + self.assertEqual(repr(get_offset("QS")), repr(get_offset("QS-JAN"))) + self.assertEqual(repr(get_offset("BQ")), repr(get_offset("BQ-DEC"))) + self.assertEqual(repr(get_offset("BQS")), repr(get_offset("BQS-JAN"))) def test_apply_ticks(): result = offsets.Hour(3).apply(offsets.Hour(4)) @@ -2814,7 +2856,7 @@ def test_str_for_named_is_name(self): names += ['WOM-' + week + day for week in ('1', '2', '3', '4') for day in days] #singletons - names += ['S', 'T', 'U', 'BM', 'BMS', 'BQ', 'QS'] # No 'Q' + names += ['S', 'T', 'U', 'BM', 'BMS', ] # No 'Q', 'BQ', 'QS', 'BQS', _offset_map.clear() for name in names: offset = get_offset(name) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index a6326794c1b12..2fec84f7c0cd1 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -14,7 +14,7 @@ from pandas.tseries.frequencies import MONTHS, DAYS, _period_code_map from pandas.tseries.period import Period, PeriodIndex, period_range from pandas.tseries.index import DatetimeIndex, date_range, Index -from pandas.tseries.tools import to_datetime +from pandas.tseries.tools import to_datetime, _try_parse_qtr_time_string import pandas.tseries.period as pmod import pandas.core.datetools as datetools @@ -29,6 +29,7 @@ import pandas.util.testing as tm from pandas import compat from numpy.testing import assert_array_equal +from pandas.tseries.offsets import FY5253Quarter, WeekDay, Week, Day class TestPeriodProperties(tm.TestCase): @@ -2169,12 +2170,45 @@ def test_pickle_freq(self): import pickle prng = period_range('1/1/2011', '1/1/2012', freq='M') new_prng = pickle.loads(pickle.dumps(prng)) - self.assertEqual(new_prng.freq,'M') + self.assertEqual(new_prng.freq, 'M') def test_slice_keep_name(self): idx = period_range('20010101', periods=10, freq='D', name='bob') self.assertEqual(idx.name, idx[1:].name) + def test_period_range_alias(self): + self.assertTrue( + pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()).to_period().identical( + pd.period_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()))) + + # GH 4878 + self.assertTrue( + pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()).to_period().identical( + pd.period_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()))) + + def test_period_range_alias2(self): + self.assertTrue( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd())).to_period().index.identical( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.MonthEnd()).to_period()).index)) + + # GH 4878 + self.assertTrue( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()) + ).to_period().index.identical( + pd.Series(range(4), + index=pd.date_range('1/1/2012', periods=4, + freq=pd.offsets.BusinessMonthEnd()).to_period()).index)) + def _permute(obj): return obj.take(np.random.permutation(len(obj))) @@ -2313,6 +2347,288 @@ def test_sort(self): self.assertEqual(sorted(periods), correctPeriods) +class TestFY5253QuarterPeriods(tm.TestCase): + def test_get_period_ordinal(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 10, 27)), 2013 * 4 + 3) + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 12, 28)), 2013 * 4 + 3) + self.assertEqual(offset.get_period_ordinal( + datetime(2013, 12, 29)), 2014 * 4 + 0) + + offset_n = FY5253Quarter(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + + self.assertEqual(offset_n.get_period_ordinal(datetime(2013, 1, 2)), + offset_n.get_period_ordinal(datetime(2013, 1, 30))) + + self.assertEqual(offset_n.get_period_ordinal(datetime(2013, 1, 1)) + 1, + offset_n.get_period_ordinal(datetime(2013, 1, 2))) + + def test_period_format(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.period_format(2013 * 4 + 3), "2013Q4") + self.assertEqual(offset.period_format(2014 * 4 + 0), "2014Q1") + + def test_get_end_dt(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 27))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 12, 28)) + self.assertEqual(offset.get_end_dt( + offset.get_period_ordinal(datetime(2013, 12, 29))), + datetime(2014, 3, 29)) + + def test_get_start_dt(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 9, 29))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 27))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 28))), + datetime(2013, 9, 29)) + self.assertEqual(offset.get_start_dt( + offset.get_period_ordinal(datetime(2013, 12, 29))), + datetime(2013, 12, 29)) + + def test_period_str(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(str(Period("2013-12-27", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-12-28", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-12-29", freq=offset)), "2014Q1") + self.assertEqual(str(Period("2013-9-29", freq=offset)), "2013Q4") + self.assertEqual(str(Period("2013-9-28", freq=offset)), "2013Q3") + + offset_n = FY5253Quarter(weekday=WeekDay.TUE, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + self.assertEqual(str(Period("2013-01-01", freq=offset_n)), "2012Q4") + self.assertEqual(str(Period("2013-01-03", freq=offset_n)), "2013Q1") + self.assertEqual(str(Period("2013-01-02", freq=offset_n)), "2013Q1") + + offset_sun = FY5253Quarter(weekday=WeekDay.SUN, startingMonth=12, + variation="nearest", qtr_with_extra_week=4) + self.assertEqual(str(Period("2011-1-2", freq=offset_sun)), "2010Q4") + self.assertEqual(str(Period("2011-1-3", freq=offset_sun)), "2011Q1") + self.assertEqual(str(Period("2011-4-3", freq=offset_sun)), "2011Q1") + self.assertEqual(str(Period("2011-4-4", freq=offset_sun)), "2011Q2") + self.assertEqual(str(Period("2011-7-3", freq=offset_sun)), "2011Q2") + self.assertEqual(str(Period("2011-7-4", freq=offset_sun)), "2011Q3") + self.assertEqual(str(Period("2003-9-28", freq=offset_sun)), "2003Q3") + self.assertEqual(str(Period("2003-9-29", freq=offset_sun)), "2003Q4") + self.assertEqual(str(Period("2004-9-26", freq=offset_sun)), "2004Q3") + self.assertEqual(str(Period("2004-9-27", freq=offset_sun)), "2004Q4") + self.assertEqual(str(Period("2005-1-2", freq=offset_sun)), "2004Q4") + self.assertEqual(str(Period("2005-1-3", freq=offset_sun)), "2005Q1") + + def test_period_str_parsing(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + self.assertEqual(_try_parse_qtr_time_string("2013Q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("2013q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("13Q4"), (2013, 4)) + self.assertEqual(_try_parse_qtr_time_string("1Q14"), (2014, 1)) + + self.assertEqual( + str(Period(offset.parse_time_string("2013Q4"), + freq=offset)), "2013Q4") + + self.assertEqual(offset.get_period_ordinal( + offset.parse_time_string("2013Q4")), 2013 * 4 + 3) + + self.assertEqual(offset.period_format( + offset.get_period_ordinal( + offset.parse_time_string("2013Q4"))), "2013Q4") + + def test_period_asfreq1(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + period = Period("2013-12-27", freq=offset) + + week_offset = Week(weekday=WeekDay.SAT) + self.assertEqual(str(period.asfreq(freq=week_offset, how="E")), + "2013-12-22/2013-12-28") + self.assertEqual(str(period.asfreq(freq=week_offset, how="S")), + "2013-09-29/2013-10-05") + + day = Day() + self.assertEqual(str(period.asfreq(freq=day, how="E")), + "2013-12-28") + + self.assertEqual(str(period.asfreq(freq=day, how="S")), + "2013-09-29") + + def test_period_asfreq2(self): + qtr_offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + week_offset = Week(weekday=WeekDay.SAT) + + period = Period("2013-12-22/2013-12-28", freq=week_offset) + + self.assertEqual(str(period.asfreq(freq=qtr_offset, how="E")), + "2013Q4") + self.assertEqual(str(period.asfreq(freq=qtr_offset, how="S")), + "2013Q4") + + def test_period_range(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + +# prange = period_range('2013Q1', periods=2, freq=offset) + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + self.assertEqual(len(prange), 2) + self.assertEqual(prange.freq, offset.periodstr) + self.assertEqual(str(prange[0]), '2013Q1') + self.assertEqual(str(prange[1]), '2013Q2') + + def test_period_range_from_ts(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + drange = date_range(datetime(2013, 1, 15), periods=2, freq=offset) + prange = drange.to_period() + + self.assertEqual(len(prange), 2) + self.assertEqual(prange.freq, offset.periodstr) + self.assertEqual(str(prange[0]), '2013Q1') + self.assertEqual(str(prange[1]), '2013Q2') + + def test_periodindex_asfreq(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + week_offset = Week(weekday=WeekDay.SAT) + + week_end = prange.asfreq(freq=week_offset, how="E") + self.assertEqual(len(week_end), 2) + self.assertEqual(week_end.freq, week_offset.periodstr) + self.assertEqual(str(week_end[0]), '2013-03-24/2013-03-30') + self.assertEqual(str(week_end[1]), '2013-06-23/2013-06-29') + + week_start = prange.asfreq(freq=week_offset, how="S") + self.assertEqual(len(week_start), 2) + self.assertEqual(week_start.freq, week_offset.periodstr) + self.assertEqual(str(week_start[0]), '2012-12-30/2013-01-05') + self.assertEqual(str(week_start[1]), '2013-03-31/2013-04-06') + + day = Day() + day_end = prange.asfreq(freq=day, how="E") + self.assertEqual(len(day_end), 2) + self.assertEqual(day_end.freq, day.periodstr) + self.assertEqual(str(day_end[0]), '2013-03-30') + self.assertEqual(str(day_end[1]), '2013-06-29') + + day_start = prange.asfreq(freq=day, how="S") + self.assertEqual(len(day_start), 2) + self.assertEqual(day_start.freq, day.periodstr) + self.assertEqual(str(day_start[0]), '2012-12-30') + self.assertEqual(str(day_start[1]), '2013-03-31') + + def test_resample_to_weekly(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + df = DataFrame({"A": [1, 2]}, index=prange) + resampled = df.resample(Week(weekday=WeekDay.SAT), fill_method="ffill") + self.assertEquals(len(resampled), 2 * 13) + self.assertEquals(str(resampled.index[0]), '2012-12-30/2013-01-05') + self.assertEquals(str(resampled.index[-1]), '2013-06-23/2013-06-29') + + tm.assert_frame_equal(resampled, + df.resample("W-SAT", fill_method="ffill")) + + assertRaisesRegexp(ValueError, + "cannot be resampled to", + df.resample, + "W-MON", fill_method="ffill") + + def test_resample_to_daily(self): + offset = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2013, 1, 15), periods=2, freq=offset) + + df = DataFrame({"A": [1, 2]}, index=prange) + resampled = df.resample(Day(), fill_method="ffill") + self.assertEquals(len(resampled), 2 * 7 * 13) + self.assertEquals(str(resampled.index[0]), '2012-12-30') + self.assertEquals(str(resampled.index[-1]), '2013-06-29') + + tm.assert_frame_equal(resampled, + df.resample("D", fill_method="ffill")) + + def test_resample_from_weekly(self): + offset_fyq = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + freq_week = Week(weekday=WeekDay.SAT) + + prange = period_range(datetime(2013, 1, 5), + periods=2 * 13, + freq=freq_week) + + df = DataFrame({"A": [1] * 13 + [2] * 13}, index=prange) + resampled = df.resample(offset_fyq, fill_method="mean") + + self.assertEquals(len(resampled), 2) + self.assertEquals(str(resampled.index[0]), '2013Q1') + self.assertEquals(str(resampled.index[-1]), '2013Q2') + self.assertEquals(resampled["A"][0], 1) + self.assertEquals(resampled["A"]["2013Q1"], 1) + self.assertEquals(resampled["A"][1], 2) + self.assertEquals(resampled["A"]["2013Q2"], 2) + + offset_fyq2 = FY5253Quarter(weekday=WeekDay.MON, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + assertRaisesRegexp(ValueError, + "cannot be resampled to", + df.resample, offset_fyq2, fill_method="ffill") + + def test_resample_from_daily(self): + offset_fyq = FY5253Quarter(weekday=WeekDay.SAT, startingMonth=12, + variation="last", qtr_with_extra_week=4) + + prange = period_range(datetime(2012, 12, 30), + periods=2 * 7 * 13, + freq=Day()) + + df = DataFrame({"A": [1] * 13 * 7 + [2] * 13 * 7}, index=prange) + resampled = df.resample(offset_fyq, fill_method="mean") + + self.assertEquals(len(resampled), 2) + self.assertEquals(str(resampled.index[0]), '2013Q1') + self.assertEquals(str(resampled.index[-1]), '2013Q2') + self.assertEquals(resampled["A"][0], 1) + self.assertEquals(resampled["A"][1], 2) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d01ad56165880..28c9d80e1c6d4 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -319,9 +319,11 @@ def _convert_listlike(arg, box, format): return _convert_listlike(np.array([ arg ]), box, format)[0] + class DateParseError(ValueError): pass + def _attempt_YYYYMMDD(arg): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ @@ -369,6 +371,34 @@ def calc_with_mask(carg,mask): has_time = re.compile('(.+)([\s]|T)+(.+)') +def _try_parse_qtr_time_string(arg): + arg = arg.upper() + + add_century = False + if len(arg) == 4: + add_century = True + qpats = [(qpat1, 1), (qpat2, 0)] + else: + qpats = [(qpat1full, 1), (qpat2full, 0)] + + for pat, yfirst in qpats: + qparse = pat.match(arg) + if qparse is not None: + if yfirst: + yi, qi = 1, 2 + else: + yi, qi = 2, 1 + q = int(qparse.group(yi)) + y_str = qparse.group(qi) + y = int(y_str) + if add_century: + y += 2000 + + return y, q + + return None + + def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): """ Try hard to parse datetime string, leveraging dateutil plus some extra @@ -389,15 +419,19 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): datetime, datetime/dateutil.parser._result, str """ from pandas.core.config import get_option + from pandas.tseries.frequencies import (_get_rule_month, _month_numbers) from pandas.tseries.offsets import DateOffset - from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, - _get_freq_str) if not isinstance(arg, compat.string_types): return arg arg = arg.upper() + if isinstance(freq, DateOffset): + parsed_dt = freq.parse_time_string(arg) + if parsed_dt is not None: + return parsed_dt, parsed_dt, freq.name + default = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) @@ -408,37 +442,26 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): ret = default.replace(year=int(m.group(1))) return ret, ret, 'year' - add_century = False - if len(arg) == 4: - add_century = True - qpats = [(qpat1, 1), (qpat2, 0)] - else: - qpats = [(qpat1full, 1), (qpat2full, 0)] - - for pat, yfirst in qpats: - qparse = pat.match(arg) - if qparse is not None: - if yfirst: - yi, qi = 1, 2 - else: - yi, qi = 2, 1 - q = int(qparse.group(yi)) - y_str = qparse.group(qi) - y = int(y_str) - if add_century: - y += 2000 - - if freq is not None: - # hack attack, #1228 - mnum = _month_numbers[_get_rule_month(freq)] + 1 - month = (mnum + (q - 1) * 3) % 12 + 1 - if month > mnum: - y -= 1 - else: - month = (q - 1) * 3 + 1 - - ret = default.replace(year=y, month=month) - return ret, ret, 'quarter' + qtr_parsed = _try_parse_qtr_time_string(arg) + if qtr_parsed is not None: + y, q = qtr_parsed + + if freq is not None: + # hack attack, #1228 + month_name = _get_rule_month(freq) + try: + mnum = _month_numbers[month_name] + 1 + except KeyError: + raise DateParseError( + "Do not understand freq: %s" % freq) + month = (mnum + (q - 1) * 3) % 12 + 1 + if month > mnum: + y -= 1 + else: + month = (q - 1) * 3 + 1 + + ret = default.replace(year=y, month=month) + return ret, ret, 'quarter' is_mo_str = freq is not None and freq == 'M' is_mo_off = getattr(freq, 'rule_code', None) == 'M' From 780ad8453f7b23a7b827422abc3ec5086d200724 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Thu, 3 Apr 2014 20:50:27 -0400 Subject: [PATCH 2/4] API: inferred_freq is a DateOffset subclass rather than a string. (GH5082) --- doc/source/release.rst | 2 ++ doc/source/v0.14.0.txt | 3 ++- pandas/core/series.py | 2 +- pandas/tseries/frequencies.py | 25 +++++++++++++++++++++++-- pandas/tseries/index.py | 10 +++++----- 5 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index bb4567f29fce2..a58897a95356b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -193,6 +193,8 @@ API Changes as its already the index - ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`) - Line plot can be stacked by ``stacked=True``. (:issue:`6656`) +- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) +- ``pd.infer_freq`` and ``DatetimeIndex.inferred_freq`` now return a DateOffset subclass rather than a string. (:issue:`5082`) Deprecations ~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 06bd4ee6e444c..ac64014b81179 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -234,7 +234,8 @@ API changes covs[df.index[-1]] - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) - +- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) +- ``pd.infer_freq`` and ``DatetimeIndex.inferred_freq`` now return a DateOffset subclass rather than a string. (:issue:`5082`) - Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) - ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers to a non-unique item in the ``Index`` (previously raised a ``KeyError``). diff --git a/pandas/core/series.py b/pandas/core/series.py index ce3a972b30ac0..d3f8b4c4ed831 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2361,7 +2361,7 @@ def to_period(self, freq=None, copy=True): new_values = new_values.copy() if freq is None: - freq = self.index.freq or self.index.inferred_freq_offset + freq = self.index.freq or self.index.inferred_freq new_index = self.index.to_period(freq=freq) return self._constructor(new_values, index=new_index).__finalize__(self) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index be41e363e870c..0f19aa2b4fa38 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -663,7 +663,7 @@ def infer_freq(index, warn=True): Returns ------- - freq : string or None + freq : DateOffset object or None None if no discernible frequency TypeError if the index is not datetime-like """ @@ -684,7 +684,28 @@ def infer_freq(index, warn=True): index = pd.DatetimeIndex(index) inferer = _FrequencyInferer(index, warn=warn) - return inferer.get_freq() + return to_offset(inferer.get_freq()) + + +def infer_freqstr(index, warn=True): + """ + Infer the most likely frequency given the input index. If the frequency is + uncertain, a warning will be printed + + Parameters + ---------- + index : DatetimeIndex + if passed a Series will use the values of the series (NOT THE INDEX) + warn : boolean, default True + + Returns + ------- + freq : string or None + None if no discernible frequency + TypeError if the index is not datetime-like + """ + return infer_freq(index, warn).freqstr + _ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index de25fbd8c3baa..01f82cf22d04b 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -14,7 +14,7 @@ from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, - Resolution, get_reso_string, get_offset) + Resolution, get_reso_string, get_offset, infer_freqstr) from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date from pandas.util.decorators import cache_readonly @@ -1428,10 +1428,10 @@ def inferred_freq(self): return None @cache_readonly - def inferred_freq_offset(self): - if self.inferred_freq is not None: - return get_offset(self.inferred_freq) - else: + def inferred_freqstr(self): + try: + return infer_freqstr(self) + except ValueError: return None @property From 44b26cc29c3ede3f226232f1a8d8018b383d9527 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Sat, 5 Apr 2014 19:34:16 -0400 Subject: [PATCH 3/4] removed freq_obj and changed api for freq --- pandas/tseries/period.py | 79 +++++++++++++-------------- pandas/tseries/resample.py | 2 +- pandas/tseries/tests/test_period.py | 6 +- pandas/tseries/tests/test_plotting.py | 6 +- 4 files changed, 44 insertions(+), 49 deletions(-) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c85ddab6fcacc..5e3e3e68fd383 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -73,7 +73,7 @@ class Period(PandasObject): minute : int, default 0 second : int, default 0 """ - __slots__ = ['freq_obj', 'ordinal'] + __slots__ = ['freq', 'ordinal'] _comparables = ['name', 'freqstr'] def __init__(self, value=None, freq=None, ordinal=None, @@ -106,10 +106,10 @@ def __init__(self, value=None, freq=None, ordinal=None, elif isinstance(value, Period): other = value if freq is None \ - or freq == other.freq_obj \ - or _gfc(freq, as_periodstr=True) == _gfc(other.freq_obj, as_periodstr=True): + or freq == other.freq \ + or _gfc(freq, as_periodstr=True) == _gfc(other.freq, as_periodstr=True): self.ordinal = other.ordinal - freq = other.freq_obj + freq = other.freq else: converted = other.asfreq(freq) self.ordinal = converted.ordinal @@ -132,7 +132,7 @@ def __init__(self, value=None, freq=None, ordinal=None, msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) - self.freq_obj = freq + self.freq = freq _check_freq_mult(freq) if self.ordinal is None: @@ -146,8 +146,8 @@ def __init__(self, value=None, freq=None, ordinal=None, base) @cache_readonly - def freq(self): - return _freq_mod.get_standard_period_freq(self.freq_obj) + def freqstr(self): + return _freq_mod.get_standard_period_freq(self.freq) def __eq__(self, other): if isinstance(other, Period): @@ -218,8 +218,8 @@ def asfreq(self, freq, how='E'): _check_freq_mult(freq) end = how == 'E' - if isinstance(self.freq_obj, _NonCythonPeriod): - value = self.freq_obj.period_asfreq_value(self.ordinal, end) + if isinstance(self.freq, _NonCythonPeriod): + value = self.freq.period_asfreq_value(self.ordinal, end) return Period(value=value, freq=freq) elif isinstance(freq, _NonCythonPeriod): freq_int, _ = _gfc(self.freq) @@ -291,21 +291,20 @@ def now(cls, freq=None): return Period(datetime.now(), freq=freq) def __get_formatted(self, fmt=None): - if isinstance(self.freq_obj, _NonCythonPeriod): - return self.freq_obj.period_format(self.ordinal, fmt=fmt) + if isinstance(self.freq, _NonCythonPeriod): + return self.freq.period_format(self.ordinal, fmt=fmt) base, mult = _gfc(self.freq) return tslib.period_format(self.ordinal, base, fmt=fmt) def __repr__(self): - freqstr = _freq_mod.get_standard_period_freq(self.freq_obj) formatted = self.__get_formatted() if not compat.PY3: encoding = com.get_option("display.encoding") formatted = formatted.encode(encoding) - return "Period('%s', '%s')" % (formatted, freqstr) + return "Period('%s', '%s')" % (formatted, self.freqstr) def __unicode__(self): """ @@ -603,7 +602,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, quarter=None, day=None, hour=None, minute=None, second=None, tz=None): - freq_obj = freq + freq_orig = freq if periods is not None: if com.is_float(periods): @@ -618,22 +617,22 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, else: fields = [year, month, quarter, day, hour, minute, second] data, freq = cls._generate_range(start, end, periods, - freq_obj, fields) + freq_orig, fields) else: - ordinal, freq = cls._from_arraylike(data, freq_obj, tz) + ordinal, freq = cls._from_arraylike(data, freq_orig, tz) data = np.array(ordinal, dtype=np.int64, copy=False) subarr = data.view(cls) subarr.name = name - # If freq_obj was initially none, fall back to freq - subarr.freq_obj = freq_obj if freq_obj is not None else freq + # If freq_orig was initially none, fall back to freq + subarr.freq = freq_orig if freq_orig is not None else freq return subarr @cache_readonly - def freq(self): - return _freq_mod.get_standard_period_freq(self.freq_obj) + def freqstr(self): + return _freq_mod.get_standard_period_freq(self.freq) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -766,7 +765,7 @@ def astype(self, dtype): def __iter__(self): for val in self.values: - yield Period(ordinal=val, freq=self.freq_obj) + yield Period(ordinal=val, freq=self.freq) @property def is_all_dates(self): @@ -793,26 +792,22 @@ def factorize(self): uniques = PeriodIndex(ordinal=uniques, freq=self.freq) return labels, uniques - @property - def freqstr(self): - return self.freq - def asfreq(self, freq=None, how='E'): how = _validate_end_alias(how) _check_freq_mult(freq) - freq_obj = freq + freq_orig = freq end = how == 'E' - if isinstance(self.freq_obj, _NonCythonPeriod): - new_data = self.freq_obj.period_asfreq_arr( - self.values, freq_obj, end) + if isinstance(self.freq, _NonCythonPeriod): + new_data = self.freq.period_asfreq_arr( + self.values, freq_orig, end) freq = _freq_mod.get_standard_freq(freq) - elif isinstance(freq_obj, _NonCythonPeriod): - freq = freq_obj.periodstr + elif isinstance(freq_orig, _NonCythonPeriod): + freq = freq_orig.periodstr freq_int_from, _ = _gfc(self.freq) - new_data = freq_obj.period_fromfreq_arr( + new_data = freq_orig.period_fromfreq_arr( self.values, freq_int_from, end) else: freq = _freq_mod.get_standard_freq(freq) @@ -823,7 +818,7 @@ def asfreq(self, freq=None, how='E'): result = new_data.view(PeriodIndex) result.name = self.name - result.freq_obj = freq_obj + result.freq = freq_orig return result def to_datetime(self, dayfirst=False): @@ -947,7 +942,7 @@ def get_value(self, series, key): return _maybe_box(self, super(PeriodIndex, self).get_value(s, key), series, key) except (KeyError, IndexError): try: - asdt, parsed, reso = parse_time_string(key, self.freq_obj) + asdt, parsed, reso = parse_time_string(key, self.freq) grp = _freq_mod._infer_period_group(reso) freqn = _freq_mod._period_group(self.freq) @@ -973,7 +968,7 @@ def get_value(self, series, key): except KeyError: pass - key = Period(key, self.freq_obj).ordinal + key = Period(key, self.freq).ordinal return _maybe_box(self, self._engine.get_value(s, key), series, key) def get_loc(self, key): @@ -1088,7 +1083,7 @@ def _wrap_union_result(self, other, result): def _apply_meta(self, rawarr): if not isinstance(rawarr, PeriodIndex): rawarr = rawarr.view(PeriodIndex) - rawarr.freq_obj = self.freq_obj + rawarr.freq = self.freq return rawarr def __getitem__(self, key): @@ -1096,7 +1091,7 @@ def __getitem__(self, key): arr_idx = self.view(np.ndarray) if np.isscalar(key): val = arr_idx[key] - return Period(ordinal=val, freq=self.freq_obj) + return Period(ordinal=val, freq=self.freq) else: if com._is_bool_indexer(key): key = np.asarray(key) @@ -1107,9 +1102,9 @@ def __getitem__(self, key): # values = np.asarray(list(values), dtype=object) # return values.reshape(result.shape) - return PeriodIndex(result, name=self.name, freq=self.freq_obj) + return PeriodIndex(result, name=self.name, freq=self.freq) - return PeriodIndex(result, name=self.name, freq=self.freq_obj) + return PeriodIndex(result, name=self.name, freq=self.freq) def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) @@ -1128,7 +1123,7 @@ def __array_finalize__(self, obj): if not self.ndim: # pragma: no cover return self.item() - self.freq_obj = getattr(obj, 'freq_obj', None) + self.freq = getattr(obj, 'freq', None) self.name = getattr(obj, 'name', None) self._reset_identity() @@ -1171,7 +1166,7 @@ def take(self, indices, axis=None): indices = com._ensure_platform_int(indices) taken = self.values.take(indices, axis=axis) taken = taken.view(PeriodIndex) - taken.freq_obj = self.freq_obj + taken.freq = self.freq taken.name = self.name return taken @@ -1228,7 +1223,7 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, nd_state) self.name = own_state[0] try: # backcompat - self.freq_obj = own_state[1] + self.freq = own_state[1] except: pass else: # pragma: no cover diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 5cf0a8a98093e..5ac2f4308ed46 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -295,7 +295,7 @@ def _resample_timestamps(self): def _resample_periods(self): # assumes set_grouper(obj) already called axlabels = self.ax - source_freq = axlabels.freq_obj + source_freq = axlabels.freq obj = self.obj if len(axlabels) == 0: diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 2fec84f7c0cd1..165a925f64564 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1862,21 +1862,21 @@ def test_to_period_quarterlyish(self): for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'Q-DEC') + self.assertEqual(prng.freqstr, 'Q-DEC') def test_to_period_annualish(self): offsets = ['BA', 'AS', 'BAS'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'A-DEC') + self.assertEqual(prng.freqstr, 'A-DEC') def test_to_period_monthish(self): offsets = ['MS', 'EOM', 'BM'] for off in offsets: rng = date_range('01-Jan-2012', periods=8, freq=off) prng = rng.to_period() - self.assertEqual(prng.freq, 'M') + self.assertEqual(prng.freqstr, 'M') def test_no_multiples(self): self.assertRaises(ValueError, period_range, '1989Q3', periods=10, diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 5d1e4b67041f7..1f2d4c583f8e3 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -676,7 +676,7 @@ def test_mixed_freq_lf_first(self): low.plot() ax = high.plot() for l in ax.get_lines(): - self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'T') + self.assertEqual(PeriodIndex(data=l.get_xdata()).freqstr, 'T') def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() @@ -695,7 +695,7 @@ def test_to_weekly_resampling(self): high.plot() ax = low.plot() for l in ax.get_lines(): - self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) + self.assert_(PeriodIndex(data=l.get_xdata()).freqstr.startswith('W')) @slow def test_from_weekly_resampling(self): @@ -706,7 +706,7 @@ def test_from_weekly_resampling(self): low.plot() ax = high.plot() for l in ax.get_lines(): - self.assert_(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) + self.assert_(PeriodIndex(data=l.get_xdata()).freqstr.startswith('W')) @slow def test_irreg_dtypes(self): From 44a6ab1a29907948f82515b8c058a8cab95a76d1 Mon Sep 17 00:00:00 2001 From: Alex Rothberg Date: Mon, 14 Apr 2014 00:27:20 -0400 Subject: [PATCH 4/4] fix issues with freq change --- doc/source/release.rst | 7 +---- doc/source/v0.14.0.txt | 1 - pandas/tseries/frequencies.py | 5 +++ pandas/tseries/index.py | 2 +- pandas/tseries/offsets.py | 4 ++- pandas/tseries/period.py | 39 ++++++++++++++---------- pandas/tseries/tests/test_frequencies.py | 5 +++ pandas/tseries/tests/test_offsets.py | 12 ++++---- pandas/tseries/tests/test_period.py | 11 +++++++ pandas/tseries/tests/test_plotting.py | 19 ++++++++---- 10 files changed, 68 insertions(+), 37 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index a58897a95356b..e16a12ce0bc91 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -193,8 +193,6 @@ API Changes as its already the index - ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`) - Line plot can be stacked by ``stacked=True``. (:issue:`6656`) -- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) -- ``pd.infer_freq`` and ``DatetimeIndex.inferred_freq`` now return a DateOffset subclass rather than a string. (:issue:`5082`) Deprecations ~~~~~~~~~~~~ @@ -465,11 +463,8 @@ Bug Fixes - Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`) - Bug in timeseries-with-frequency plot cursor display (:issue:`5453`) - Bug surfaced in groupby.plot when using a ``Float64Index`` (:issue:`7025`) -<<<<<<< HEAD - Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`) -======= -- Bug in not correctly treading 'QS', 'BQS', 'BQ' as frquency aliases (:issue:`5028`). ->>>>>>> ENH: Extends the number of Periods supported by allowing for Python defined Periods. Periods now hold reference to Offset rather than just freqstr (GH5148). +- Bug in not correctly treating 'QS', 'BQS', 'BQ', 'Y' as frquency aliases (:issue:`5028`). pandas 0.13.1 ------------- diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index ac64014b81179..fc561a1f99387 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -234,7 +234,6 @@ API changes covs[df.index[-1]] - ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) -- ``Panel.shift`` now uses ``NDFrame.shift``. It no longer drops the ``nan`` data and retains its original shape. (:issue:`4867`) - ``pd.infer_freq`` and ``DatetimeIndex.inferred_freq`` now return a DateOffset subclass rather than a string. (:issue:`5082`) - Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) - ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0f19aa2b4fa38..12a8ac4844552 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -163,6 +163,7 @@ def _get_freq_str(base, mult=1): 'H': 'H', 'Q': 'Q', 'A': 'A', + 'Y': 'A', 'W': 'W', 'M': 'M' } @@ -979,6 +980,8 @@ def is_subperiod(source, target): return source_raw.is_subperiod(target_raw) elif isinstance(target_raw, offsets._NonCythonPeriod): return target_raw.is_superperiod(source_raw) + else: + return False def is_superperiod(source, target): @@ -1038,6 +1041,8 @@ def is_superperiod(source, target): return source_raw.is_superperiod(target_raw) elif isinstance(target_raw, offsets._NonCythonPeriod): return target_raw.is_subperiod(source_raw) + else: + return False def _get_rule_month(source, default='DEC'): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 01f82cf22d04b..5345fc6f8abcf 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -793,7 +793,7 @@ def to_period(self, freq=None): raise ValueError(msg) if freq is None: # No reason no convert to str; keep w/e freq is - freq = self.freq #get_period_alias(self.freqstr) + freq = self.freq return PeriodIndex(self.values, freq=freq, tz=self.tz) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 04ae0603a0b85..67950587b9026 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -64,7 +64,9 @@ class _NonCythonPeriod(object): """ This class represents the base class for Offsets for which Period logic is not implemented in Cython. This allows fully Python defined Offsets with - Period support + Period support. + All subclasses are expected to implement get_start_dt, get_end_dt, + period_format, get_period_ordinal, is_superperiod and is_subperiod. """ def get_start_dt(self, ordinal): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 5e3e3e68fd383..3411303188a7d 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -74,7 +74,7 @@ class Period(PandasObject): second : int, default 0 """ __slots__ = ['freq', 'ordinal'] - _comparables = ['name', 'freqstr'] + _comparables = ['name','freqstr'] def __init__(self, value=None, freq=None, ordinal=None, year=None, month=1, quarter=None, day=1, @@ -107,7 +107,7 @@ def __init__(self, value=None, freq=None, ordinal=None, other = value if freq is None \ or freq == other.freq \ - or _gfc(freq, as_periodstr=True) == _gfc(other.freq, as_periodstr=True): + or _gfc(freq, as_periodstr=True) == _gfc(other.freq, as_periodstr=True):#TODO: use freqstr? self.ordinal = other.ordinal freq = other.freq else: @@ -131,9 +131,14 @@ def __init__(self, value=None, freq=None, ordinal=None, else: msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) + + _check_freq_mult(freq) + + #TODO: Fix this + if not isinstance(freq, DateOffset): + freq = _freq_mod._get_freq_str(_gfc(freq)[0]) self.freq = freq - _check_freq_mult(freq) if self.ordinal is None: if isinstance(freq, _NonCythonPeriod): @@ -149,9 +154,12 @@ def __init__(self, value=None, freq=None, ordinal=None, def freqstr(self): return _freq_mod.get_standard_period_freq(self.freq) + def _same_freq(self, other): + return other.freq == self.freq or other.freqstr == self.freqstr + def __eq__(self, other): if isinstance(other, Period): - if other.freq != self.freq: + if not self._same_freq(other): raise ValueError("Cannot compare non-conforming periods") return (self.ordinal == other.ordinal and _gfc(self.freq) == _gfc(other.freq)) @@ -294,7 +302,7 @@ def __get_formatted(self, fmt=None): if isinstance(self.freq, _NonCythonPeriod): return self.freq.period_format(self.ordinal, fmt=fmt) - base, mult = _gfc(self.freq) + base, mult = _gfc(self.freq, as_periodstr=True) return tslib.period_format(self.ordinal, base, fmt=fmt) def __repr__(self): @@ -496,19 +504,15 @@ def _get_ordinals(data, freq): return lib.map_infer(data, f) -def dt64arr_to_periodarr_impl(data, freq, tz): - base, _ = _gfc(freq, as_periodstr=True) - return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) - - def dt64arr_to_periodarr(data, freq, tz): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) if isinstance(freq, _NonCythonPeriod): return freq.dt64arr_to_periodarr(data, tz) - - return dt64arr_to_periodarr_impl(data, freq, tz) + else: + base, _ = _gfc(freq, as_periodstr=True) + return tslib.dt64arr_to_periodarr(data.view('i8'), base, tz) # --- Period index sketch @@ -520,12 +524,12 @@ def _period_index_cmp(opname): def wrapper(self, other): if isinstance(other, Period): func = getattr(self.values, opname) - if other.freq != self.freq: + if not other._same_freq(self): raise AssertionError("Frequencies must be equal") result = func(other.ordinal) elif isinstance(other, PeriodIndex): - if other.freq != self.freq: + if not other._same_freq(self): raise AssertionError("Frequencies must be equal") return getattr(self.values, opname)(other.values) else: @@ -634,6 +638,9 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, def freqstr(self): return _freq_mod.get_standard_period_freq(self.freq) + def _same_freq(self, other): + return other.freq == self.freq or other.freqstr == self.freqstr + @classmethod def _generate_range(cls, start, end, periods, freq, fields): field_count = com._count_not_none(*fields) @@ -1129,7 +1136,7 @@ def __array_finalize__(self, obj): def __repr__(self): output = com.pprint_thing(self.__class__) + '\n' - output += 'freq: %s\n' % self.freq + output += 'freq: %s\n' % self.freqstr n = len(self) if n == 1: output += '[%s]\n' % (self[0]) @@ -1146,7 +1153,7 @@ def __unicode__(self): prefix = '' if compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) - output += ", freq='{0}'".format(self.freq) + output += ", freq='{0}'".format(self.freqstr) output += ')' return output diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 896f469f934c6..40ae8f7dc7a11 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -327,6 +327,11 @@ def test_is_superperiod_subperiod(): assert(fmod.is_superperiod(offsets.Hour(), offsets.Minute())) assert(fmod.is_subperiod(offsets.Minute(), offsets.Hour())) + +def test_get_period_alias_yearly(): + assert fmod.get_period_alias('Y') == fmod.get_period_alias('A') + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index c01bd4fcae8df..ecafbfdf3cf22 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -2725,24 +2725,24 @@ def test_rule_code(self): self.assertEqual(alias, get_offset(alias).rule_code) self.assertEqual(alias, (get_offset(alias) * 5).rule_code) - #GH5028 def test_offset_map(self): + #GH5028 for name, offset in compat.iteritems(_offset_map): if name == 'C' and cday is None: continue self.assertEqual(name, None if offset is None else offset.rule_code) - - #GH5028 + def test_many_to_one_mapping(self): - offsets = [ + #GH5028 + offsets = [ QuarterBegin(startingMonth=1), BQuarterBegin(startingMonth=1), BQuarterEnd(startingMonth=12), ] - + for offset in offsets: self.assertEqual(get_offset_name(offset), offset.rule_code) - + def test_aliased_offset_equality(self): self.assertEqual(get_offset("Q"), get_offset("Q")) self.assertEqual(get_offset("Q"), get_offset("Q-DEC")) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 165a925f64564..cb6d75ffe0d70 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1699,6 +1699,11 @@ def test_ts_repr(self): expected = "\nfreq: Q-DEC\n[2013Q1, ..., 2013Q3]\nlength: 3" assert_equal(repr(val), expected) + def test_period_weeklies(self): + p1 = Period('2006-12-31', 'W') + p2 = Period('2006-12-31', '1w') + assert_equal(p1.freq, p2.freq) + def test_period_index_unicode(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 9) @@ -2629,6 +2634,12 @@ def test_resample_from_daily(self): self.assertEquals(resampled["A"][0], 1) self.assertEquals(resampled["A"][1], 2) + def test_freq_to_period(self): + r = pd.date_range('01-Jan-2012', periods=8, freq='QS') + x = r.to_period() + self.assert_("freq='Q-DEC'" in str(x)) + self.assert_("freq: Q-DEC" in repr(x)) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 1f2d4c583f8e3..d07af679c0d47 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -13,6 +13,7 @@ from pandas.tseries.offsets import DateOffset from pandas.tseries.period import period_range, Period, PeriodIndex from pandas.tseries.resample import DatetimeIndex +from pandas.tseries.frequencies import get_period_alias from pandas.util.testing import assert_series_equal, ensure_clean import pandas.util.testing as tm @@ -97,7 +98,7 @@ def test_tsplot(self): f = lambda *args, **kwds: tsplot(s, plt.Axes.plot, *args, **kwds) for s in self.period_ser: - _check_plot_works(f, s.index.freq, ax=ax, series=s) + _check_plot_works(f, s.index.freq, ax=ax, series=s, is_period=True) for s in self.datetime_ser: _check_plot_works(f, s.index.freq.rule_code, ax=ax, series=s) @@ -149,7 +150,7 @@ def check_format_of_first_point(ax, expected_string): @slow def test_line_plot_period_series(self): for s in self.period_ser: - _check_plot_works(s.plot, s.index.freq) + _check_plot_works(s.plot, s.index.freq, is_period=True) @slow def test_line_plot_datetime_series(self): @@ -159,7 +160,7 @@ def test_line_plot_datetime_series(self): @slow def test_line_plot_period_frame(self): for df in self.period_df: - _check_plot_works(df.plot, df.index.freq) + _check_plot_works(df.plot, df.index.freq, is_period=True) @slow def test_line_plot_datetime_frame(self): @@ -924,7 +925,7 @@ def test_mpl_nopandas(self): line2.get_xydata()[:, 0]) -def _check_plot_works(f, freq=None, series=None, *args, **kwargs): +def _check_plot_works(f, freq=None, series=None, is_period=False, *args, **kwargs): import matplotlib.pyplot as plt fig = plt.gcf() @@ -944,10 +945,16 @@ def _check_plot_works(f, freq=None, series=None, *args, **kwargs): if isinstance(dfreq, DateOffset): dfreq = dfreq.rule_code if orig_axfreq is None: - assert ax.freq == dfreq + if is_period: + assert get_period_alias(ax.freq) == get_period_alias(dfreq) + else: + assert ax.freq == dfreq if freq is not None and orig_axfreq is None: - assert ax.freq == freq + if is_period: + assert get_period_alias(ax.freq) == get_period_alias(freq) + else: + assert ax.freq == freq ax = fig.add_subplot(212) try: