Skip to content

Commit 49f3616

Browse files
committed
Merge pull request #7041 from sinhrks/unstack
BUG: unstack fails in PeriodIndex
2 parents 19fe376 + 76d33a1 commit 49f3616

File tree

5 files changed

+92
-5
lines changed

5 files changed

+92
-5
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ Bug Fixes
487487
views; mark ``is_copy`` on ``xs` only if its an actual copy (and not a view) (:issue:`7084`)
488488
- Bug in DatetimeIndex creation from string ndarray with ``dayfirst=True`` (:issue:`5917`)
489489
- Bug in ``MultiIndex.from_arrays`` created from ``DatetimeIndex`` doesn't preserve ``freq`` and ``tz`` (:issue:`7090`)
490+
- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`)
490491

491492
pandas 0.13.1
492493
-------------

pandas/core/reshape.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas.algos as algos
1919

2020
from pandas.core.index import Index, MultiIndex
21+
from pandas.tseries.period import PeriodIndex
2122

2223

2324
class _Unstacker(object):
@@ -81,8 +82,11 @@ def __init__(self, values, index, level=-1, value_columns=None):
8182
labels = index.labels
8283

8384
def _make_index(lev, lab):
84-
i = lev.__class__(_make_index_array_level(lev.values, lab))
85-
i.name = lev.name
85+
if isinstance(lev, PeriodIndex):
86+
i = lev.copy()
87+
else:
88+
i = lev.__class__(_make_index_array_level(lev.values, lab))
89+
i.name = lev.name
8690
return i
8791

8892
self.new_index_levels = [_make_index(lev, lab)

pandas/tests/test_categorical.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas.tseries.period import PeriodIndex
1414
from pandas.util.testing import assert_almost_equal
1515
import pandas.core.common as com
16+
from pandas.tseries.period import PeriodIndex
1617

1718
import pandas.util.testing as tm
1819

@@ -183,7 +184,7 @@ def test_empty_print(self):
183184

184185
def test_periodindex(self):
185186
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
186-
'2014-03', '2014-03'], freq='M')
187+
'2014-03', '2014-03'], freq='M')
187188
cat1 = Categorical.from_array(idx1)
188189

189190
exp_arr = np.array([0, 0, 1, 1, 2, 2])
@@ -192,8 +193,9 @@ def test_periodindex(self):
192193
self.assert_numpy_array_equal(cat1.labels, exp_arr)
193194
self.assert_(cat1.levels.equals(exp_idx))
194195

196+
195197
idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
196-
'2014-03', '2014-01'], freq='M')
198+
'2014-03', '2014-01'], freq='M')
197199
cat2 = Categorical.from_array(idx2)
198200

199201
exp_arr = np.array([2, 2, 1, 0, 2, 0])

pandas/tests/test_multilevel.py

+80
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,86 @@ def test_stack_unstack_multiple(self):
964964
expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how='all')
965965
assert_frame_equal(unstacked, expected.ix[:, unstacked.columns])
966966

967+
def test_unstack_period_series(self):
968+
# GH 4342
969+
idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02',
970+
'2013-03', '2013-03'], freq='M', name='period')
971+
idx2 = Index(['A', 'B'] * 3, name='str')
972+
value = [1, 2, 3, 4, 5, 6]
973+
974+
idx = MultiIndex.from_arrays([idx1, idx2])
975+
s = Series(value, index=idx)
976+
977+
result1 = s.unstack()
978+
result2 = s.unstack(level=1)
979+
result3 = s.unstack(level=0)
980+
981+
e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period')
982+
expected = DataFrame({'A': [1, 3, 5], 'B': [2, 4, 6]}, index=e_idx,
983+
columns=['A', 'B'])
984+
expected.columns.name = 'str'
985+
986+
assert_frame_equal(result1, expected)
987+
assert_frame_equal(result2, expected)
988+
assert_frame_equal(result3, expected.T)
989+
990+
idx1 = pd.PeriodIndex(['2013-01', '2013-01', '2013-02', '2013-02',
991+
'2013-03', '2013-03'], freq='M', name='period1')
992+
993+
idx2 = pd.PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
994+
'2013-08', '2013-07'], freq='M', name='period2')
995+
idx = pd.MultiIndex.from_arrays([idx1, idx2])
996+
s = Series(value, index=idx)
997+
998+
result1 = s.unstack()
999+
result2 = s.unstack(level=1)
1000+
result3 = s.unstack(level=0)
1001+
1002+
e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period1')
1003+
e_cols = pd.PeriodIndex(['2013-07', '2013-08', '2013-09', '2013-10',
1004+
'2013-11', '2013-12'], freq='M', name='period2')
1005+
expected = DataFrame([[np.nan, np.nan, np.nan, np.nan, 2, 1],
1006+
[np.nan, np.nan, 4, 3, np.nan, np.nan],
1007+
[6, 5, np.nan, np.nan, np.nan, np.nan]],
1008+
index=e_idx, columns=e_cols)
1009+
1010+
assert_frame_equal(result1, expected)
1011+
assert_frame_equal(result2, expected)
1012+
assert_frame_equal(result3, expected.T)
1013+
1014+
def test_unstack_period_frame(self):
1015+
# GH 4342
1016+
idx1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-02', '2014-02', '2014-01', '2014-01'],
1017+
freq='M', name='period1')
1018+
idx2 = pd.PeriodIndex(['2013-12', '2013-12', '2014-02', '2013-10', '2013-10', '2014-02'],
1019+
freq='M', name='period2')
1020+
value = {'A': [1, 2, 3, 4, 5, 6], 'B': [6, 5, 4, 3, 2, 1]}
1021+
idx = pd.MultiIndex.from_arrays([idx1, idx2])
1022+
df = pd.DataFrame(value, index=idx)
1023+
1024+
result1 = df.unstack()
1025+
result2 = df.unstack(level=1)
1026+
result3 = df.unstack(level=0)
1027+
1028+
e_1 = pd.PeriodIndex(['2014-01', '2014-02'], freq='M', name='period1')
1029+
e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02', '2013-10',
1030+
'2013-12', '2014-02'], freq='M', name='period2')
1031+
e_cols = pd.MultiIndex.from_arrays(['A A A B B B'.split(), e_2])
1032+
expected = DataFrame([[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]],
1033+
index=e_1, columns=e_cols)
1034+
1035+
assert_frame_equal(result1, expected)
1036+
assert_frame_equal(result2, expected)
1037+
1038+
e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01',
1039+
'2014-02'], freq='M', name='period1')
1040+
e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02'], freq='M', name='period2')
1041+
e_cols = pd.MultiIndex.from_arrays(['A A B B'.split(), e_1])
1042+
expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]],
1043+
index=e_2, columns=e_cols)
1044+
1045+
assert_frame_equal(result3, expected)
1046+
9671047
def test_stack_multiple_bug(self):
9681048
""" bug when some uniques are not present in the data #3170"""
9691049
id_col = ([1] * 3) + ([2] * 3)

pandas/tseries/tests/test_period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2177,7 +2177,7 @@ def test_slice_keep_name(self):
21772177

21782178
def test_factorize(self):
21792179
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
2180-
'2014-03', '2014-03'], freq='M')
2180+
'2014-03', '2014-03'], freq='M')
21812181

21822182
exp_arr = np.array([0, 0, 1, 1, 2, 2])
21832183
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')

0 commit comments

Comments
 (0)