Skip to content

Commit 224a66d

Browse files
committed
Merge pull request #9292 from behzadnouri/tst-unstack
TST: tests for GH4862, GH7401, GH7403, GH7405
2 parents a774ee8 + e513486 commit 224a66d

File tree

4 files changed

+129
-18
lines changed

4 files changed

+129
-18
lines changed

doc/source/whatsnew/v0.16.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ Bug Fixes
146146
- Fixed bug on bug endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`).
147147

148148
- Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`, :issue:`5873`)
149-
- Bug in ``pivot`` and `unstack`` where ``nan`` values would break index alignment (:issue:`7466`)
149+
- Bug in ``pivot`` and `unstack`` where ``nan`` values would break index alignment (:issue:`4862`, :issue:`7401`, :issue:`7403`, :issue:`7405`, :issue:`7466`)
150150
- Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`).
151151
- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`).
152152

pandas/core/reshape.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import pandas.core.common as com
1818
import pandas.algos as algos
1919

20-
from pandas.core.index import MultiIndex, _get_na_value
20+
from pandas.core.index import MultiIndex
2121

2222

2323
class _Unstacker(object):
@@ -198,14 +198,8 @@ def get_new_values(self):
198198

199199
def get_new_columns(self):
200200
if self.value_columns is None:
201-
if self.lift == 0:
202-
return self.removed_level
203-
204-
lev = self.removed_level
205-
vals = np.insert(lev.astype('object'), 0,
206-
_get_na_value(lev.dtype.type))
207-
208-
return lev._shallow_copy(vals)
201+
return _make_new_index(self.removed_level, None) \
202+
if self.lift != 0 else self.removed_level
209203

210204
stride = len(self.removed_level) + self.lift
211205
width = len(self.value_columns)
@@ -232,19 +226,31 @@ def get_new_index(self):
232226
# construct the new index
233227
if len(self.new_index_levels) == 1:
234228
lev, lab = self.new_index_levels[0], result_labels[0]
235-
if not (lab == -1).any():
236-
return lev.take(lab)
237-
238-
vals = np.insert(lev.astype('object'), len(lev),
239-
_get_na_value(lev.dtype.type)).take(lab)
240-
241-
return lev._shallow_copy(vals)
229+
return _make_new_index(lev, lab) \
230+
if (lab == -1).any() else lev.take(lab)
242231

243232
return MultiIndex(levels=self.new_index_levels,
244233
labels=result_labels,
245234
names=self.new_index_names,
246235
verify_integrity=False)
247236

237+
238+
def _make_new_index(lev, lab):
239+
from pandas.core.index import Index, _get_na_value
240+
241+
nan = _get_na_value(lev.dtype.type)
242+
vals = lev.values.astype('object')
243+
vals = np.insert(vals, 0, nan) if lab is None else \
244+
np.insert(vals, len(vals), nan).take(lab)
245+
246+
try:
247+
vals = vals.astype(lev.dtype, subok=False, copy=False)
248+
except ValueError:
249+
return Index(vals, **lev._get_attributes_dict())
250+
251+
return lev._shallow_copy(vals)
252+
253+
248254
def _unstack_multiple(data, clocs):
249255
if len(clocs) == 0:
250256
return data

pandas/tests/test_frame.py

+106
Original file line numberDiff line numberDiff line change
@@ -12328,6 +12328,25 @@ def test_unstack_dtypes(self):
1232812328
expected = Series({'float64' : 2, 'object' : 2})
1232912329
assert_series_equal(result, expected)
1233012330

12331+
# GH7405
12332+
for c, d in (np.zeros(5), np.zeros(5)), \
12333+
(np.arange(5, dtype='f8'), np.arange(5, 10, dtype='f8')):
12334+
12335+
df = DataFrame({'A': ['a']*5, 'C':c, 'D':d,
12336+
'B':pd.date_range('2012-01-01', periods=5)})
12337+
12338+
right = df.iloc[:3].copy(deep=True)
12339+
12340+
df = df.set_index(['A', 'B'])
12341+
df['D'] = df['D'].astype('int64')
12342+
12343+
left = df.iloc[:3].unstack(0)
12344+
right = right.set_index(['A', 'B']).unstack(0)
12345+
right[('D', 'a')] = right[('D', 'a')].astype('int64')
12346+
12347+
self.assertEqual(left.shape, (3, 2))
12348+
tm.assert_frame_equal(left, right)
12349+
1233112350
def test_unstack_non_unique_index_names(self):
1233212351
idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
1233312352
names=['c1', 'c1'])
@@ -12385,6 +12404,93 @@ def verify(df):
1238512404
for col in ['4th', '5th']:
1238612405
verify(udf[col])
1238712406

12407+
# GH7403
12408+
df = pd.DataFrame({'A': list('aaaabbbb'),'B':range(8), 'C':range(8)})
12409+
df.iloc[3, 1] = np.NaN
12410+
left = df.set_index(['A', 'B']).unstack(0)
12411+
12412+
vals = [[3, 0, 1, 2, nan, nan, nan, nan],
12413+
[nan, nan, nan, nan, 4, 5, 6, 7]]
12414+
vals = list(map(list, zip(*vals)))
12415+
idx = Index([nan, 0, 1, 2, 4, 5, 6, 7], name='B')
12416+
cols = MultiIndex(levels=[['C'], ['a', 'b']],
12417+
labels=[[0, 0], [0, 1]],
12418+
names=[None, 'A'])
12419+
12420+
right = DataFrame(vals, columns=cols, index=idx)
12421+
assert_frame_equal(left, right)
12422+
12423+
df = DataFrame({'A': list('aaaabbbb'), 'B':list(range(4))*2,
12424+
'C':range(8)})
12425+
df.iloc[2,1] = np.NaN
12426+
left = df.set_index(['A', 'B']).unstack(0)
12427+
12428+
vals = [[2, nan], [0, 4], [1, 5], [nan, 6], [3, 7]]
12429+
cols = MultiIndex(levels=[['C'], ['a', 'b']],
12430+
labels=[[0, 0], [0, 1]],
12431+
names=[None, 'A'])
12432+
idx = Index([nan, 0, 1, 2, 3], name='B')
12433+
right = DataFrame(vals, columns=cols, index=idx)
12434+
assert_frame_equal(left, right)
12435+
12436+
df = pd.DataFrame({'A': list('aaaabbbb'),'B':list(range(4))*2,
12437+
'C':range(8)})
12438+
df.iloc[3,1] = np.NaN
12439+
left = df.set_index(['A', 'B']).unstack(0)
12440+
12441+
vals = [[3, nan], [0, 4], [1, 5], [2, 6], [nan, 7]]
12442+
cols = MultiIndex(levels=[['C'], ['a', 'b']],
12443+
labels=[[0, 0], [0, 1]],
12444+
names=[None, 'A'])
12445+
idx = Index([nan, 0, 1, 2, 3], name='B')
12446+
right = DataFrame(vals, columns=cols, index=idx)
12447+
assert_frame_equal(left, right)
12448+
12449+
# GH7401
12450+
df = pd.DataFrame({'A': list('aaaaabbbbb'), 'C':np.arange(10),
12451+
'B':date_range('2012-01-01', periods=5).tolist()*2 })
12452+
12453+
df.iloc[3,1] = np.NaN
12454+
left = df.set_index(['A', 'B']).unstack()
12455+
12456+
vals = np.array([[3, 0, 1, 2, nan, 4], [nan, 5, 6, 7, 8, 9]])
12457+
idx = Index(['a', 'b'], name='A')
12458+
cols = MultiIndex(levels=[['C'], date_range('2012-01-01', periods=5)],
12459+
labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
12460+
names=[None, 'B'])
12461+
12462+
right = DataFrame(vals, columns=cols, index=idx)
12463+
assert_frame_equal(left, right)
12464+
12465+
# GH4862
12466+
vals = [['Hg', nan, nan, 680585148],
12467+
['U', 0.0, nan, 680585148],
12468+
['Pb', 7.07e-06, nan, 680585148],
12469+
['Sn', 2.3614e-05, 0.0133, 680607017],
12470+
['Ag', 0.0, 0.0133, 680607017],
12471+
['Hg', -0.00015, 0.0133, 680607017]]
12472+
df = DataFrame(vals, columns=['agent', 'change', 'dosage', 's_id'],
12473+
index=[17263, 17264, 17265, 17266, 17267, 17268])
12474+
12475+
left = df.copy().set_index(['s_id','dosage','agent']).unstack()
12476+
12477+
vals = [[nan, nan, 7.07e-06, nan, 0.0],
12478+
[0.0, -0.00015, nan, 2.3614e-05, nan]]
12479+
12480+
idx = MultiIndex(levels=[[680585148, 680607017], [0.0133]],
12481+
labels=[[0, 1], [-1, 0]],
12482+
names=['s_id', 'dosage'])
12483+
12484+
cols = MultiIndex(levels=[['change'], ['Ag', 'Hg', 'Pb', 'Sn', 'U']],
12485+
labels=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]],
12486+
names=[None, 'agent'])
12487+
12488+
right = DataFrame(vals, columns=cols, index=idx)
12489+
assert_frame_equal(left, right)
12490+
12491+
left = df.ix[17264:].copy().set_index(['s_id','dosage','agent'])
12492+
assert_frame_equal(left.unstack(), right)
12493+
1238812494
def test_stack_datetime_column_multiIndex(self):
1238912495
# GH 8039
1239012496
t = datetime(2014, 1, 1)

pandas/tests/test_series.py

-1
Original file line numberDiff line numberDiff line change
@@ -5954,7 +5954,6 @@ def test_unstack(self):
59545954
idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
59555955
ts = pd.Series([1,2], index=idx)
59565956
left = ts.unstack()
5957-
left.columns = left.columns.astype('float64')
59585957
right = DataFrame([[nan, 1], [2, nan]], index=[101, 102],
59595958
columns=[nan, 3.5])
59605959
assert_frame_equal(left, right)

0 commit comments

Comments
 (0)