diff --git a/doc/source/release.rst b/doc/source/release.rst index 53abc22cd02f4..bf946840cdcd8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -487,6 +487,7 @@ Bug Fixes views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`) - Bug in DatetimeIndex creation from string ndarray with ``dayfirst=True`` (:issue:`5917`) - Bug in ``MultiIndex.from_arrays`` created from ``DatetimeIndex`` doesn't preserve ``freq`` and ``tz`` (:issue:`7090`) +- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`) pandas 0.13.1 ------------- diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 75b95973a0f67..65eadff002eb6 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -18,6 +18,7 @@ import pandas.algos as algos from pandas.core.index import Index, MultiIndex +from pandas.tseries.period import PeriodIndex class _Unstacker(object): @@ -81,8 +82,11 @@ def __init__(self, values, index, level=-1, value_columns=None): labels = index.labels def _make_index(lev, lab): - i = lev.__class__(_make_index_array_level(lev.values, lab)) - i.name = lev.name + if isinstance(lev, PeriodIndex): + i = lev.copy() + else: + i = lev.__class__(_make_index_array_level(lev.values, lab)) + i.name = lev.name return i self.new_index_levels = [_make_index(lev, lab) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 04e9f238d1dbe..0ea0b435a78be 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -13,6 +13,7 @@ from pandas.tseries.period import PeriodIndex from pandas.util.testing import assert_almost_equal import pandas.core.common as com +from pandas.tseries.period import PeriodIndex import pandas.util.testing as tm @@ -183,7 +184,7 @@ def test_empty_print(self): def test_periodindex(self): idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03'], freq='M') + '2014-03', '2014-03'], freq='M') cat1 = Categorical.from_array(idx1) exp_arr = np.array([0, 0, 1, 1, 2, 2]) @@ -192,8 +193,9 @@ def test_periodindex(self): self.assert_numpy_array_equal(cat1.labels, exp_arr) self.assert_(cat1.levels.equals(exp_idx)) + idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01', - '2014-03', '2014-01'], freq='M') + '2014-03', '2014-01'], freq='M') cat2 = Categorical.from_array(idx2) exp_arr = np.array([2, 2, 1, 0, 2, 0]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 00f7b65f5690e..63bace138884f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -964,6 +964,86 @@ def test_stack_unstack_multiple(self): expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all') assert_frame_equal(unstacked, expected.ix[:, unstacked.columns]) + def test_unstack_period_series(self): + # GH 4342 + idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02', + '2013-03', '2013-03'], freq='M', name='period') + idx2 = Index(['A', 'B'] * 3, name='str') + value = [1, 2, 3, 4, 5, 6] + + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period') + expected = DataFrame({'A': [1, 3, 5], 'B': [2, 4, 6]}, index=e_idx, + columns=['A', 'B']) + expected.columns.name = 'str' + + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + assert_frame_equal(result3, expected.T) + + idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02', + '2013-03', '2013-03'], freq='M', name='period1') + + idx2 = pd.PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', + '2013-08', '2013-07'], freq='M', name='period2') + idx = pd.MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period1') + e_cols = pd.PeriodIndex(['2013-07', '2013-08', '2013-09', '2013-10', + '2013-11', '2013-12'], freq='M', name='period2') + expected = DataFrame([[np.nan, np.nan, np.nan, np.nan, 2, 1], + [np.nan, np.nan, 4, 3, np.nan, np.nan], + [6, 5, np.nan, np.nan, np.nan, np.nan]], + index=e_idx, columns=e_cols) + + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + assert_frame_equal(result3, expected.T) + + def test_unstack_period_frame(self): + # GH 4342 + idx1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-02', '2014-02', '2014-01', '2014-01'], + freq='M', name='period1') + idx2 = pd.PeriodIndex(['2013-12', '2013-12', '2014-02', '2013-10', '2013-10', '2014-02'], + freq='M', name='period2') + value = {'A': [1, 2, 3, 4, 5, 6], 'B': [6, 5, 4, 3, 2, 1]} + idx = pd.MultiIndex.from_arrays([idx1, idx2]) + df = pd.DataFrame(value, index=idx) + + result1 = df.unstack() + result2 = df.unstack(level=1) + result3 = df.unstack(level=0) + + e_1 = pd.PeriodIndex(['2014-01', '2014-02'], freq='M', name='period1') + e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02', '2013-10', + '2013-12', '2014-02'], freq='M', name='period2') + e_cols = pd.MultiIndex.from_arrays(['A A A B B B'.split(), e_2]) + expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], + index=e_1, columns=e_cols) + + assert_frame_equal(result1, expected) + assert_frame_equal(result2, expected) + + e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01', + '2014-02'], freq='M', name='period1') + e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02'], freq='M', name='period2') + e_cols = pd.MultiIndex.from_arrays(['A A B B'.split(), e_1]) + expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], + index=e_2, columns=e_cols) + + assert_frame_equal(result3, expected) + def test_stack_multiple_bug(self): """ bug when some uniques are not present in the data #3170""" id_col = ([1] * 3) + ([2] * 3) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 43a4d4ff1239b..1a72c7925b6ee 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2177,7 +2177,7 @@ def test_slice_keep_name(self): def test_factorize(self): idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02', - '2014-03', '2014-03'], freq='M') + '2014-03', '2014-03'], freq='M') exp_arr = np.array([0, 0, 1, 1, 2, 2]) exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')