diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index e8c7a6f9ab462..bde685e0e6167 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -252,7 +252,7 @@ Bug Fixes - Bug in ``Index.astype(float)`` where it would return an ``object`` dtype ``Index`` (:issue:`7464`). - +- Bug in ``DataFrame.reset_index`` loses ``tz`` (:issue:`3950`) - Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9645d09a5fd0d..d770f8c8f853a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2326,19 +2326,24 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0, else: new_obj = self.copy() - def _maybe_cast(values, labels=None): - - if values.dtype == np.object_: - values = lib.maybe_convert_objects(values) - - # if we have the labels, extract the values with a mask - if labels is not None: - mask = labels == -1 - values = values.take(labels) - if mask.any(): - values, changed = com._maybe_upcast_putmask( - values, mask, np.nan) - + def _maybe_casted_values(index, labels=None): + if isinstance(index, PeriodIndex): + values = index.asobject + elif (isinstance(index, DatetimeIndex) and + index.tz is not None): + values = index.asobject + else: + values = index.values + if values.dtype == np.object_: + values = lib.maybe_convert_objects(values) + + # if we have the labels, extract the values with a mask + if labels is not None: + mask = labels == -1 + values = values.take(labels) + if mask.any(): + values, changed = com._maybe_upcast_putmask(values, + mask, np.nan) return values new_index = np.arange(len(new_obj)) @@ -2371,7 +2376,7 @@ def _maybe_cast(values, labels=None): col_name = tuple(name_lst) # to ndarray and maybe infer different dtype - level_values = _maybe_cast(lev.values, lab) + level_values = _maybe_casted_values(lev, lab) if level is None or i in level: new_obj.insert(0, col_name, level_values) @@ -2387,13 +2392,7 @@ def _maybe_cast(values, labels=None): lev_num = self.columns._get_level_number(col_level) name_lst[lev_num] = name name = tuple(name_lst) - if isinstance(self.index, PeriodIndex): - values = self.index.asobject - elif (isinstance(self.index, DatetimeIndex) and - self.index.tz is not None): - values = self.index.asobject - else: - values = _maybe_cast(self.index.values) + values = _maybe_casted_values(self.index) new_obj.insert(0, name, values) new_obj.index = new_index diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 6848b130dee3a..ed2b19f5f2e19 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2182,6 +2182,20 @@ def test_set_index_cast_datetimeindex(self): df.pop('ts') assert_frame_equal(df, expected) + # GH 3950 + # reset_index with single level + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: + idx = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx') + df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + + expected = pd.DataFrame({'idx': [datetime(2011, 1, 1), datetime(2011, 1, 2), + datetime(2011, 1, 3), datetime(2011, 1, 4), + datetime(2011, 1, 5)], + 'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx', 'a', 'b']) + expected['idx'] = expected['idx'].apply(lambda d: pd.Timestamp(d, tz=tz)) + assert_frame_equal(df.reset_index(), expected) + def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index b8ccfb3eb151b..54544a87e4038 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2078,6 +2078,46 @@ def test_set_index_datetime(self): self.assertTrue(df.index.get_level_values(1).equals(idx2)) self.assertTrue(df.index.get_level_values(2).equals(idx3)) + def test_reset_index_datetime(self): + # GH 3950 + for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: + idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx1') + idx2 = pd.Index(range(5), name='idx2') + idx = pd.MultiIndex.from_arrays([idx1, idx2]) + df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + + expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': range(5), + 'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'a', 'b']) + expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz)) + assert_frame_equal(df.reset_index(), expected) + + idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') + idx = pd.MultiIndex.from_arrays([idx1, idx2, idx3]) + df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + + expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5)], + 'idx2': range(5), + 'idx3': [datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1)], + 'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'idx3', 'a', 'b']) + expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz)) + expected['idx3'] = expected['idx3'].apply(lambda d: pd.Timestamp(d, tz='Europe/Paris')) + assert_frame_equal(df.reset_index(), expected) + def test_set_index_period(self): # GH 6631 df = DataFrame(np.random.random(6))