From a1072948e1e64928ac397785c3134bc62fb62768 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 13 Mar 2016 20:05:17 +0900 Subject: [PATCH] BUG: Mixed period cannot be displayed --- doc/source/whatsnew/v0.18.1.txt | 5 ++-- pandas/core/format.py | 8 +++++- pandas/src/period.pyx | 12 ++++++--- pandas/tests/test_format.py | 35 +++++++++++++++++++++++++ pandas/tseries/common.py | 1 + pandas/tseries/period.py | 26 +++++++++---------- pandas/tseries/tests/test_base.py | 40 +++++++++++++++-------------- pandas/tseries/tests/test_period.py | 34 ++++++++++++++---------- 8 files changed, 109 insertions(+), 52 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index b7a0cf888f1a2..1a3ee89562cc3 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -44,7 +44,7 @@ Enhancements API changes ~~~~~~~~~~~ - +- ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`) @@ -130,5 +130,6 @@ Bug Fixes - - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) + diff --git a/pandas/core/format.py b/pandas/core/format.py index 1f1ff73869f73..16a870cbc6901 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -2235,7 +2235,13 @@ def _format_strings(self): class PeriodArrayFormatter(IntArrayFormatter): def _format_strings(self): - values = PeriodIndex(self.values).to_native_types() + from pandas.tseries.period import IncompatibleFrequency + try: + values = PeriodIndex(self.values).to_native_types() + except IncompatibleFrequency: + # periods may contains different freq + values = Index(self.values, dtype='object').to_native_types() + formatter = self.formatter or (lambda x: '%s' % x) fmt_values = [formatter(x) for x in values] return fmt_values diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 48c017c43c0aa..33c213ac5d8df 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -452,7 +452,8 @@ def extract_ordinals(ndarray[object] values, freq): p = values[i] ordinals[i] = p.ordinal if p.freqstr != freqstr: - raise ValueError(_DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr)) + msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr) + raise IncompatibleFrequency(msg) return ordinals @@ -627,6 +628,11 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, _DIFFERENT_FREQ = "Input has different freq={1} from Period(freq={0})" _DIFFERENT_FREQ_INDEX = "Input has different freq={1} from PeriodIndex(freq={0})" + +class IncompatibleFrequency(ValueError): + pass + + cdef class Period(object): """ Represents an period of time @@ -768,7 +774,7 @@ cdef class Period(object): from pandas.tseries.frequencies import get_freq_code as _gfc if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: return _nat_scalar_rules[op] return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) @@ -809,7 +815,7 @@ cdef class Period(object): ordinal = self.ordinal + other.n return Period(ordinal=ordinal, freq=self.freq) msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) else: # pragma no cover return NotImplemented diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 6772c1ee4b1ee..7b1138db6c08d 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -3151,6 +3151,20 @@ def test_to_csv_engine_kw_deprecation(self): df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) df.to_csv(engine='python') + def test_period(self): + # GH 12615 + df = pd.DataFrame({'A': pd.period_range('2013-01', + periods=4, freq='M'), + 'B': [pd.Period('2011-01', freq='M'), + pd.Period('2011-02-01', freq='D'), + pd.Period('2011-03-01 09:00', freq='H'), + pd.Period('2011-04', freq='M')], + 'C': list('abcd')}) + exp = (" A B C\n0 2013-01 2011-01 a\n" + "1 2013-02 2011-02-01 b\n2 2013-03 2011-03-01 09:00 c\n" + "3 2013-04 2011-04 d") + self.assertEqual(str(df), exp) + class TestSeriesFormatting(tm.TestCase): _multiprocess_can_split_ = True @@ -3481,6 +3495,27 @@ def test_mixed_datetime64(self): result = repr(df.ix[0]) self.assertTrue('2012-01-01' in result) + def test_period(self): + # GH 12615 + index = pd.period_range('2013-01', periods=6, freq='M') + s = Series(np.arange(6), index=index) + exp = ("2013-01 0\n2013-02 1\n2013-03 2\n2013-04 3\n" + "2013-05 4\n2013-06 5\nFreq: M, dtype: int64") + self.assertEqual(str(s), exp) + + s = Series(index) + exp = ("0 2013-01\n1 2013-02\n2 2013-03\n3 2013-04\n" + "4 2013-05\n5 2013-06\ndtype: object") + self.assertEqual(str(s), exp) + + # periods with mixed freq + s = Series([pd.Period('2011-01', freq='M'), + pd.Period('2011-02-01', freq='D'), + pd.Period('2011-03-01 09:00', freq='H')]) + exp = ("0 2011-01\n1 2011-02-01\n" + "2 2011-03-01 09:00\ndtype: object") + self.assertEqual(str(s), exp) + def test_max_multi_index_display(self): # GH 7101 diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 5c31d79dc6780..aa50d0e581e99 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -6,6 +6,7 @@ from pandas.core.base import PandasDelegate, NoNewAttributesMixin from pandas.core import common as com from pandas.tseries.index import DatetimeIndex +from pandas._period import IncompatibleFrequency # flake8: noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas import tslib diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index df04984bcb582..798df0b9e31bd 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -8,13 +8,10 @@ from pandas.tseries.tools import parse_time_string import pandas.tseries.offsets as offsets -from pandas._period import Period import pandas._period as period -from pandas._period import ( - get_period_field_arr, - _validate_end_alias, - _quarter_to_myear, -) +from pandas._period import (Period, IncompatibleFrequency, + get_period_field_arr, _validate_end_alias, + _quarter_to_myear) import pandas.core.common as com from pandas.core.common import (isnull, _INT64_DTYPE, _maybe_box, @@ -69,13 +66,13 @@ def wrapper(self, other): other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) result = func(other.ordinal) elif isinstance(other, PeriodIndex): if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) result = getattr(self.values, opname)(other.values) @@ -392,7 +389,7 @@ def searchsorted(self, key, side='left'): if isinstance(key, Period): if key.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, key.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) key = key.ordinal elif isinstance(key, compat.string_types): key = Period(key, freq=self.freq).ordinal @@ -573,6 +570,8 @@ def _maybe_convert_timedelta(self, other): base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: return other.n + msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): if com.is_integer_dtype(other): return other @@ -583,8 +582,9 @@ def _maybe_convert_timedelta(self, other): offset_nanos = tslib._delta_to_nanoseconds(offset) if (nanos % offset_nanos).all() == 0: return nanos // offset_nanos + # raise when input doesn't have freq msg = "Input has different freq from PeriodIndex(freq={0})" - raise ValueError(msg.format(self.freqstr)) + raise IncompatibleFrequency(msg.format(self.freqstr)) def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) @@ -663,8 +663,8 @@ def get_value(self, series, key): def get_indexer(self, target, method=None, limit=None, tolerance=None): if hasattr(target, 'freq') and target.freq != self.freq: - raise ValueError('target and index have different freq: ' - '(%s, %s)' % (target.freq, self.freq)) + msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, target.freqstr) + raise IncompatibleFrequency(msg) return Index.get_indexer(self, target, method, limit, tolerance) def get_loc(self, key, method=None, tolerance=None): @@ -801,7 +801,7 @@ def _assert_can_do_setop(self, other): if self.freq != other.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise ValueError(msg) + raise IncompatibleFrequency(msg) def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 7ddf3354324f9..3c35fc8299517 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -6,6 +6,7 @@ PeriodIndex, TimedeltaIndex, Timedelta, timedelta_range, date_range, Float64Index) import pandas.tslib as tslib +import pandas.tseries.period as period import pandas.util.testing as tm @@ -1617,9 +1618,9 @@ def test_add_iadd(self): for o in [pd.offsets.YearBegin(2), pd.offsets.MonthBegin(1), pd.offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365), Timedelta(days=365)]: - msg = 'Input has different freq from PeriodIndex\\(freq=A-DEC\\)' - with tm.assertRaisesRegexp(ValueError, - 'Input has different freq from Period'): + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng + o rng = pd.period_range('2014-01', '2016-12', freq='M') @@ -1633,8 +1634,8 @@ def test_add_iadd(self): pd.offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365), Timedelta(days=365)]: rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng + o # Tick @@ -1654,8 +1655,8 @@ def test_add_iadd(self): pd.offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23), Timedelta('23:00:00')]: rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng + o offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -1676,10 +1677,10 @@ def test_add_iadd(self): np.timedelta64(30, 's'), Timedelta(seconds=30)]: rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): result = rng + delta - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng += delta # int @@ -1745,8 +1746,9 @@ def test_sub_isub(self): pd.offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: rng = pd.period_range('2014', '2024', freq='A') - msg = 'Input has different freq from PeriodIndex\\(freq=A-DEC\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = ('Input has different freq(=.+)? ' + 'from PeriodIndex\\(freq=A-DEC\\)') + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng - o rng = pd.period_range('2014-01', '2016-12', freq='M') @@ -1760,8 +1762,8 @@ def test_sub_isub(self): pd.offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq from PeriodIndex\\(freq=M\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng - o # Tick @@ -1780,8 +1782,8 @@ def test_sub_isub(self): pd.offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng - o offsets = [pd.offsets.Hour(2), timedelta(hours=2), @@ -1801,10 +1803,10 @@ def test_sub_isub(self): np.timedelta64(30, 's')]: rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with tm.assertRaisesRegexp(ValueError, msg): + msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): result = rng + delta - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): rng += delta # int diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index e8af63f3355c9..e0dad2995f91c 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2886,12 +2886,16 @@ def test_union(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(ValueError, index.union, index2) + with tm.assertRaises(period.IncompatibleFrequency): + index.union(index2) - self.assertRaises(ValueError, index.join, index.to_timestamp()) + msg = 'can only call with other PeriodIndex-ed objects' + with tm.assertRaisesRegexp(ValueError, msg): + index.join(index.to_timestamp()) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - self.assertRaises(ValueError, index.join, index3) + with tm.assertRaises(period.IncompatibleFrequency): + index.join(index3) def test_union_dataframe_index(self): rng1 = pd.period_range('1/1/1999', '1/1/2012', freq='M') @@ -2919,10 +2923,12 @@ def test_intersection(self): # raise if different frequencies index = period_range('1/1/2000', '1/20/2000', freq='D') index2 = period_range('1/1/2000', '1/20/2000', freq='W-WED') - self.assertRaises(ValueError, index.intersection, index2) + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index2) index3 = period_range('1/1/2000', '1/20/2000', freq='2D') - self.assertRaises(ValueError, index.intersection, index3) + with tm.assertRaises(period.IncompatibleFrequency): + index.intersection(index3) def test_intersection_cases(self): base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') @@ -3213,11 +3219,11 @@ def test_searchsorted(self): self.assertEqual(pidx.searchsorted(p2), 3) msg = "Input has different freq=H from PeriodIndex" - with self.assertRaisesRegexp(ValueError, msg): + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='H')) msg = "Input has different freq=5D from PeriodIndex" - with self.assertRaisesRegexp(ValueError, msg): + with self.assertRaisesRegexp(period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) def test_round_trip(self): @@ -3535,7 +3541,7 @@ def test_pi_ops_array(self): # incompatible freq msg = "Input has different freq from PeriodIndex\(freq=M\)" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): idx + np.array([np.timedelta64(1, 'D')] * 4) idx = PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', 'NaT', @@ -3551,7 +3557,7 @@ def test_pi_ops_array(self): self.assert_index_equal(result, exp) msg = "Input has different freq from PeriodIndex\(freq=H\)" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): idx + np.array([np.timedelta64(1, 's')] * 4) idx = PeriodIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', 'NaT', @@ -3754,7 +3760,7 @@ def test_pi_pi_comp(self): # different base freq msg = "Input has different freq=A-DEC from PeriodIndex" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): base <= Period('2011', freq='A') with tm.assertRaisesRegexp(ValueError, msg): @@ -3763,10 +3769,10 @@ def test_pi_pi_comp(self): # different mult msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): base <= Period('2011', freq='4M') - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): idx = PeriodIndex(['2011', '2012', '2013', '2014'], freq='4M') base <= idx @@ -3812,9 +3818,9 @@ def test_pi_nat_comp(self): diff = PeriodIndex( ['2011-02', '2011-01', '2011-04', 'NaT'], freq='4M') msg = "Input has different freq=4M from PeriodIndex" - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): idx1 > diff - with tm.assertRaisesRegexp(ValueError, msg): + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): idx1 == diff