Skip to content

Commit eaf38f9

Browse files
committed
BUG: handle mixed-type frames carefully in unstack, GH #403
1 parent 194db26 commit eaf38f9

File tree

4 files changed

+67
-14
lines changed

4 files changed

+67
-14
lines changed

RELEASE.rst

+8-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ pandas 0.6.0
2727

2828
**Release date:** Not yet released
2929

30+
**API Changes**
31+
32+
- Arithmetic methods like `sum` will attempt to sum dtype=object values by
33+
default instead of excluding them (GH #382)
34+
3035
**New features / modules**
3136

3237
- Add `melt` function to `pandas.core.reshape`
@@ -146,7 +151,9 @@ pandas 0.6.0
146151
depending on whether the passed function is a reduction (GH #389)
147152
- Always return NA/NaN from Series.min/max and DataFrame.min/max when all of a
148153
row/column/values are NA (GH #384)
149-
154+
- Enable partial setting with .ix / advanced indexing (GH #397)
155+
- Handle mixed-type DataFrames correctly in unstack, do not lose type
156+
information (GH #403)
150157

151158
Thanks
152159
------

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,9 @@
8383

8484
_doc_exclude_na = "NA/null values are excluded"
8585

86-
_numeric_only_doc = """numeric_only : boolean, default False
87-
Include only float, int, boolean data
86+
_numeric_only_doc = """numeric_only : boolean, default None
87+
Include only float, int, boolean data. If None, will attempt to use
88+
everything, then use only numeric data
8889
"""
8990

9091
def _add_stat_doc(f, name, shortname, na_action=_doc_exclude_na,
@@ -2799,7 +2800,6 @@ def skew(self, axis=0, skipna=True, level=None):
27992800
_add_stat_doc(skew, 'unbiased skewness', 'skew')
28002801

28012802
def _reduce(self, op, axis=0, skipna=True, numeric_only=None):
2802-
28032803
f = lambda x: op(x, axis=axis, skipna=skipna, copy=True)
28042804
labels = self._get_agg_axis(axis)
28052805
if numeric_only is None:

pandas/core/reshape.py

+46-10
Original file line numberDiff line numberDiff line change
@@ -114,27 +114,32 @@ def _make_selectors(self):
114114
def get_result(self):
115115
# TODO: find a better way than this masking business
116116

117-
values, mask = self.get_new_values()
117+
values, value_mask = self.get_new_values()
118118
columns = self.get_new_columns()
119119
index = self.get_new_index()
120120

121121
# filter out missing levels
122122
if values.shape[1] > 0:
123+
mask = value_mask.sum(0) > 0
123124
values = values[:, mask]
124125
columns = columns[mask]
125126

126127
return DataFrame(values, index=index, columns=columns)
127128

128129
def get_new_values(self):
130+
return self._reshape_values(self.values)
131+
132+
def _reshape_values(self, values):
133+
values = self.values
129134
# place the values
130135
length, width = self.full_shape
131-
stride = self.values.shape[1]
136+
stride = values.shape[1]
132137
result_width = width * stride
133138

134-
new_values = np.empty((length, result_width), dtype=self.values.dtype)
139+
new_values = np.empty((length, result_width), dtype=values.dtype)
135140
new_mask = np.zeros((length, result_width), dtype=bool)
136141

137-
if issubclass(self.values.dtype.type, np.integer):
142+
if issubclass(values.dtype.type, np.integer):
138143
new_values = new_values.astype(float)
139144

140145
new_values.fill(np.nan)
@@ -148,7 +153,7 @@ def get_new_values(self):
148153
mask_chunk.flat[self.mask] = True
149154

150155
new_values = new_values.take(self.unique_groups, axis=0)
151-
return new_values, new_mask.sum(0) > 0
156+
return new_values, new_mask
152157

153158
def get_new_columns(self):
154159
if self.value_columns is None:
@@ -284,12 +289,43 @@ def _slow_pivot(index, columns, values):
284289

285290
def unstack(obj, level):
286291
if isinstance(obj, DataFrame):
287-
columns = obj.columns
292+
return _unstack_frame(obj, level)
293+
else:
294+
unstacker = _Unstacker(obj.values, obj.index, level=level)
295+
return unstacker.get_result()
296+
297+
def _unstack_frame(obj, level):
298+
from pandas.core.internals import BlockManager, make_block
299+
300+
if obj._is_mixed_type:
301+
unstacker = _Unstacker(np.empty(obj.shape, dtype=bool), # dummy
302+
obj.index, level=level,
303+
value_columns=obj.columns)
304+
new_columns = unstacker.get_new_columns()
305+
new_index = unstacker.get_new_index()
306+
new_axes = [new_columns, new_index]
307+
308+
new_blocks = []
309+
mask_blocks = []
310+
for blk in obj._data.blocks:
311+
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
312+
value_columns=blk.items)
313+
new_items = bunstacker.get_new_columns()
314+
new_values, mask = bunstacker.get_new_values()
315+
316+
mblk = make_block(mask.T, new_items, new_columns)
317+
mask_blocks.append(mblk)
318+
319+
newb = make_block(new_values.T, new_items, new_columns)
320+
new_blocks.append(newb)
321+
322+
result = DataFrame(BlockManager(new_blocks, new_axes))
323+
mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
324+
return result.ix[:, mask_frame.sum(0) > 0]
288325
else:
289-
columns = None
290-
unstacker = _Unstacker(obj.values, obj.index, level=level,
291-
value_columns=columns)
292-
return unstacker.get_result()
326+
unstacker = _Unstacker(obj.values, obj.index, level=level,
327+
value_columns=obj.columns)
328+
return unstacker.get_result()
293329

294330
def stack(frame, level=-1, dropna=True):
295331
"""

pandas/tests/test_multilevel.py

+10
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,16 @@ def test_partial_set(self):
679679
exp.ix[2000].values[:] = 5
680680
assert_frame_equal(df, exp)
681681

682+
def test_unstack_preserve_types(self):
683+
# GH #403
684+
self.ymd['E'] = 'foo'
685+
self.ymd['F'] = 2
686+
687+
unstacked = self.ymd.unstack('month')
688+
self.assert_(unstacked['A', 1].dtype == np.float64)
689+
self.assert_(unstacked['E', 1].dtype == np.object_)
690+
self.assert_(unstacked['F', 1].dtype == np.float64)
691+
682692
if __name__ == '__main__':
683693

684694
# unittest.main()

0 commit comments

Comments
 (0)