Skip to content

Commit d531718

Browse files
kordekjreback
authored andcommitted
BUG: GH11847 Unstack with mixed dtypes coerces everything to object
closes #11847 Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes). Author: Pawel Kordek <[email protected]> Closes #14053 from kordek/#11847 and squashes the following commits: 6a381ce [Pawel Kordek] BUG: GH11847 Unstack with mixed dtypes coerces everything to object
1 parent 34807fc commit d531718

File tree

3 files changed

+46
-2
lines changed

3 files changed

+46
-2
lines changed

doc/source/whatsnew/v0.19.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,5 @@ Bug Fixes
7676

7777

7878
- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`)
79+
80+
- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`)

pandas/core/reshape.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
277277
verify_integrity=False)
278278

279279
if isinstance(data, Series):
280-
dummy = Series(data.values, index=dummy_index)
280+
dummy = data.copy()
281+
dummy.index = dummy_index
281282
unstacked = dummy.unstack('__placeholder__')
282283
new_levels = clevels
283284
new_names = cnames
@@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
292293

293294
return result
294295

295-
dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
296+
dummy = data.copy()
297+
dummy.index = dummy_index
296298

297299
unstacked = dummy.unstack('__placeholder__')
298300
if isinstance(unstacked, Series):

pandas/tests/frame/test_reshape.py

+40
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,46 @@ def test_unstack_fill_frame_categorical(self):
282282
index=list('xyz'))
283283
assert_frame_equal(result, expected)
284284

285+
def test_unstack_preserve_dtypes(self):
286+
# Checks fix for #11847
287+
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
288+
index=['a', 'b', 'c'],
289+
some_categories=pd.Series(['a', 'b', 'c']
290+
).astype('category'),
291+
A=np.random.rand(3),
292+
B=1,
293+
C='foo',
294+
D=pd.Timestamp('20010102'),
295+
E=pd.Series([1.0, 50.0, 100.0]
296+
).astype('float32'),
297+
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
298+
G=False,
299+
H=pd.Series([1, 200, 923442], dtype='int8')))
300+
301+
def unstack_and_compare(df, column_name):
302+
unstacked1 = df.unstack([column_name])
303+
unstacked2 = df.unstack(column_name)
304+
assert_frame_equal(unstacked1, unstacked2)
305+
306+
df1 = df.set_index(['state', 'index'])
307+
unstack_and_compare(df1, 'index')
308+
309+
df1 = df.set_index(['state', 'some_categories'])
310+
unstack_and_compare(df1, 'some_categories')
311+
312+
df1 = df.set_index(['F', 'C'])
313+
unstack_and_compare(df1, 'F')
314+
315+
df1 = df.set_index(['G', 'B', 'state'])
316+
unstack_and_compare(df1, 'B')
317+
318+
df1 = df.set_index(['E', 'A'])
319+
unstack_and_compare(df1, 'E')
320+
321+
df1 = df.set_index(['state', 'index'])
322+
s = df1['A']
323+
unstack_and_compare(s, 'index')
324+
285325
def test_stack_ints(self):
286326
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
287327
repeat=3)))

0 commit comments

Comments
 (0)