From 625ee94a31e4011b677785f12dfd204591cadfcf Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 13 May 2013 09:28:32 -0400 Subject: [PATCH] BUG: Fixed bug in reset_index with ``NaN`` in a multi-index (GH3586_) --- RELEASE.rst | 2 ++ pandas/core/frame.py | 19 ++++++++++++++----- pandas/tests/test_indexing.py | 19 +++++++++++++++++++ 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index efd6b87e59c62..eaff573a7510a 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -103,6 +103,7 @@ pandas 0.11.1 - Fix ``.diff`` on datelike and timedelta operations (GH3100_) - ``combine_first`` not returning the same dtype in cases where it can (GH3552_) - Fixed bug with ``Panel.transpose`` argument aliases (GH3556_) + - Fixed bug in reset_index with ``NaN`` in a multi-index (GH3586_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -140,6 +141,7 @@ pandas 0.11.1 .. _GH3492: https://github.com/pydata/pandas/issues/3492 .. _GH3552: https://github.com/pydata/pandas/issues/3552 .. _GH3562: https://github.com/pydata/pandas/issues/3562 +.. _GH3586: https://github.com/pydata/pandas/issues/3586 .. _GH3493: https://github.com/pydata/pandas/issues/3493 .. _GH3556: https://github.com/pydata/pandas/issues/3556 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ad1429fcea1ca..725d10c2270d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2808,9 +2808,18 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, else: new_obj = self.copy() - def _maybe_cast(values): + def _maybe_cast(values, labels=None): + if values.dtype == np.object_: values = lib.maybe_convert_objects(values) + + # if we have the labels, extract the values with a mask + if labels is not None: + mask = labels == -1 + values = values.take(labels) + if mask.any(): + values, changed = com._maybe_upcast_putmask(values,mask,np.nan) + return values new_index = np.arange(len(new_obj)) @@ -2843,9 +2852,9 @@ def _maybe_cast(values): col_name = tuple(name_lst) # to ndarray and maybe infer different dtype - level_values = _maybe_cast(lev.values) + level_values = _maybe_cast(lev.values, lab) if level is None or i in level: - new_obj.insert(0, col_name, level_values.take(lab)) + new_obj.insert(0, col_name, level_values) elif not drop: name = self.index.name @@ -2865,8 +2874,8 @@ def _maybe_cast(values): self.index.tz is not None): values = self.index.asobject else: - values = self.index.values - new_obj.insert(0, name, _maybe_cast(values)) + values = _maybe_cast(self.index.values) + new_obj.insert(0, name, values) new_obj.index = new_index if not inplace: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f70c781847cc7..01651f2674a90 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -799,6 +799,25 @@ def test_indexing_mixed_frame_bug(self): self.assert_(df.iloc[0,2] == '-----') #if I look at df, then element [0,2] equals '_'. If instead I type df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I get '_'. + + + def test_set_index_nan(self): + + # GH 3586 + df = DataFrame({'PRuid': {17: 'nonQC', 18: 'nonQC', 19: 'nonQC', 20: '10', 21: '11', 22: '12', 23: '13', + 24: '24', 25: '35', 26: '46', 27: '47', 28: '48', 29: '59', 30: '10'}, + 'QC': {17: 0.0, 18: 0.0, 19: 0.0, 20: nan, 21: nan, 22: nan, 23: nan, 24: 1.0, 25: nan, + 26: nan, 27: nan, 28: nan, 29: nan, 30: nan}, + 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, 20: 0.86140349999999999, + 21: 0.87853110000000001, 22: 0.8427041999999999, 23: 0.78587700000000005, 24: 0.73062459999999996, + 25: 0.81668560000000001, 26: 0.81927080000000008, 27: 0.80705009999999999, 28: 0.81440240000000008, + 29: 0.80140849999999997, 30: 0.81307740000000006}, + 'year': {17: 2006, 18: 2007, 19: 2008, 20: 1985, 21: 1985, 22: 1985, 23: 1985, + 24: 1985, 25: 1985, 26: 1985, 27: 1985, 28: 1985, 29: 1985, 30: 1986}}).reset_index() + + result = df.set_index(['year','PRuid','QC']).reset_index().reindex(columns=df.columns) + assert_frame_equal(result,df) + if __name__ == '__main__': import nose