Skip to content

Commit 40f5d2f

Browse files
committed
BUG: GH11847 Unstack with mixed dtypes coerces everything to object
Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
1 parent 7a2bcb6 commit 40f5d2f

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,5 @@ Performance Improvements
8080

8181
Bug Fixes
8282
~~~~~~~~~
83+
84+
- Bug in ``unstack()`` If called with list of column(s) as argument, regardless of dtypes all columns get coerced to ``object`` (:issue:`11847`)

pandas/core/reshape.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
277277
verify_integrity=False)
278278

279279
if isinstance(data, Series):
280-
dummy = Series(data.values, index=dummy_index)
280+
dummy = data.copy()
281+
dummy.index = dummy_index
281282
unstacked = dummy.unstack('__placeholder__')
282283
new_levels = clevels
283284
new_names = cnames
@@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
292293

293294
return result
294295

295-
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
296+
dummy = data.copy()
297+
dummy.index = dummy_index
296298

297299
unstacked = dummy.unstack('__placeholder__')
298300
if isinstance(unstacked, Series):

pandas/tests/frame/test_reshape.py

+40
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,46 @@ def test_unstack_fill_frame_categorical(self):
282282
index=list('xyz'))
283283
assert_frame_equal(result, expected)
284284

285+
def test_unstack_preserve_dtypes(self):
286+
# Checks fix for #11847
287+
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
288+
index=['a', 'b', 'c'],
289+
A=np.random.rand(3),
290+
B=1,
291+
C='foo',
292+
D=pd.Timestamp('20010102'),
293+
E=pd.Series([1.0, 50.0, 100.0]
294+
).astype('float32'),
295+
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
296+
G=False,
297+
H=pd.Series([1, 200, 923442], dtype='int8')))
298+
299+
df1 = df.set_index(['state', 'index'])
300+
unstacked1 = df1.unstack(['index'])
301+
unstacked2 = df1.unstack('index')
302+
assert_frame_equal(unstacked1, unstacked2)
303+
304+
df1 = df.set_index(['F', 'C'])
305+
unstacked1 = df1.unstack(['F'])
306+
unstacked2 = df1.unstack('F')
307+
assert_frame_equal(unstacked1, unstacked2)
308+
309+
df1 = df.set_index(['G', 'B', 'state'])
310+
unstacked1 = df1.unstack(['B'])
311+
unstacked2 = df1.unstack('B')
312+
assert_frame_equal(unstacked1, unstacked2)
313+
314+
df1 = df.set_index(['E', 'A'])
315+
unstacked1 = df1.unstack(['E'])
316+
unstacked2 = df1.unstack('E')
317+
assert_frame_equal(unstacked1, unstacked2)
318+
319+
df1 = df.set_index(['state', 'index'])
320+
s = df1['A']
321+
unstacked1 = s.unstack(['index'])
322+
unstacked2 = s.unstack('index')
323+
assert_frame_equal(unstacked1, unstacked2)
324+
285325
def test_stack_ints(self):
286326
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
287327
repeat=3)))

0 commit comments

Comments
 (0)