Skip to content

Commit 1fe657a

Browse files
committed
Merge pull request #3040 from jreback/unstack
BUG: Unstack of a frame with no nans would always cause dtype upcasting (GH #2929)
2 parents fc2d3cb + 32dbd95 commit 1fe657a

File tree

4 files changed

+50
-5
lines changed

4 files changed

+50
-5
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ pandas 0.11.0
148148
- Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
149149
- Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from
150150
other values), (GH2850_)
151+
- Unstack of a frame with no nans would always cause dtype upcasting (GH2929_)
151152

152153
.. _GH622: https://github.com/pydata/pandas/issues/622
153154
.. _GH797: https://github.com/pydata/pandas/issues/797
@@ -169,6 +170,7 @@ pandas 0.11.0
169170
.. _GH2892: https://github.com/pydata/pandas/issues/2892
170171
.. _GH2909: https://github.com/pydata/pandas/issues/2909
171172
.. _GH2922: https://github.com/pydata/pandas/issues/2922
173+
.. _GH2929: https://github.com/pydata/pandas/issues/2929
172174
.. _GH2931: https://github.com/pydata/pandas/issues/2931
173175
.. _GH2973: https://github.com/pydata/pandas/issues/2973
174176
.. _GH2967: https://github.com/pydata/pandas/issues/2967

pandas/core/reshape.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,23 @@ def get_result(self):
144144

145145
def get_new_values(self):
146146
values = self.values
147+
147148
# place the values
148149
length, width = self.full_shape
149150
stride = values.shape[1]
150151
result_width = width * stride
152+
result_shape = (length, result_width)
151153

152-
dtype, fill_value = _maybe_promote(values.dtype)
153-
new_values = np.empty((length, result_width), dtype=dtype)
154-
new_values.fill(fill_value)
155-
new_mask = np.zeros((length, result_width), dtype=bool)
154+
# if our mask is all True, then we can use our existing dtype
155+
if self.mask.all():
156+
dtype = values.dtype
157+
new_values = np.empty(result_shape, dtype=dtype)
158+
else:
159+
dtype, fill_value = _maybe_promote(values.dtype)
160+
new_values = np.empty(result_shape, dtype=dtype)
161+
new_values.fill(fill_value)
162+
163+
new_mask = np.zeros(result_shape, dtype=bool)
156164

157165
# is there a simpler / faster way of doing this?
158166
for i in xrange(values.shape[1]):

pandas/tests/test_frame.py

+35
Original file line numberDiff line numberDiff line change
@@ -8242,6 +8242,41 @@ def test_unstack_to_series(self):
82428242
data = data.unstack()
82438243
assert_frame_equal(old_data, data)
82448244

8245+
def test_unstack_dtypes(self):
8246+
8247+
# GH 2929
8248+
rows = [[1, 1, 3, 4],
8249+
[1, 2, 3, 4],
8250+
[2, 1, 3, 4],
8251+
[2, 2, 3, 4]]
8252+
8253+
df = DataFrame(rows, columns=list('ABCD'))
8254+
result = df.get_dtype_counts()
8255+
expected = Series({'int64' : 4})
8256+
assert_series_equal(result, expected)
8257+
8258+
# single dtype
8259+
df2 = df.set_index(['A','B'])
8260+
df3 = df2.unstack('B')
8261+
result = df3.get_dtype_counts()
8262+
expected = Series({'int64' : 4})
8263+
assert_series_equal(result, expected)
8264+
8265+
# mixed
8266+
df2 = df.set_index(['A','B'])
8267+
df2['C'] = 3.
8268+
df3 = df2.unstack('B')
8269+
result = df3.get_dtype_counts()
8270+
expected = Series({'int64' : 2, 'float64' : 2})
8271+
assert_series_equal(result, expected)
8272+
8273+
df2['D'] = 'foo'
8274+
df3 = df2.unstack('B')
8275+
result = df3.get_dtype_counts()
8276+
expected = Series({'float64' : 2, 'object' : 2})
8277+
assert_series_equal(result, expected)
8278+
8279+
82458280
def test_reset_index(self):
82468281
stacked = self.frame.stack()[::2]
82478282
stacked = DataFrame({'foo': stacked, 'bar': stacked})

pandas/tests/test_multilevel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ def test_unstack_group_index_overflow(self):
13461346

13471347
# test roundtrip
13481348
stacked = result.stack()
1349-
assert_series_equal(s.astype(np.float64),
1349+
assert_series_equal(s,
13501350
stacked.reindex(s.index))
13511351

13521352
# put it at beginning

0 commit comments

Comments
 (0)