Skip to content

Commit cc452b2

Browse files
committed
Merge pull request #7533 from sinhrks/resetindex
BUG: df.reset_index loses tz
2 parents 3971223 + 530e87c commit cc452b2

File tree

4 files changed

+75
-22
lines changed

4 files changed

+75
-22
lines changed

doc/source/v0.14.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ Bug Fixes
250250

251251
- Bug in ``Index.astype(float)`` where it would return an ``object`` dtype
252252
``Index`` (:issue:`7464`).
253-
253+
- Bug in ``DataFrame.reset_index`` loses ``tz`` (:issue:`3950`)
254254

255255

256256
- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`)

pandas/core/frame.py

+20-21
Original file line numberDiff line numberDiff line change
@@ -2326,19 +2326,24 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
23262326
else:
23272327
new_obj = self.copy()
23282328

2329-
def _maybe_cast(values, labels=None):
2330-
2331-
if values.dtype == np.object_:
2332-
values = lib.maybe_convert_objects(values)
2333-
2334-
# if we have the labels, extract the values with a mask
2335-
if labels is not None:
2336-
mask = labels == -1
2337-
values = values.take(labels)
2338-
if mask.any():
2339-
values, changed = com._maybe_upcast_putmask(
2340-
values, mask, np.nan)
2341-
2329+
def _maybe_casted_values(index, labels=None):
2330+
if isinstance(index, PeriodIndex):
2331+
values = index.asobject
2332+
elif (isinstance(index, DatetimeIndex) and
2333+
index.tz is not None):
2334+
values = index.asobject
2335+
else:
2336+
values = index.values
2337+
if values.dtype == np.object_:
2338+
values = lib.maybe_convert_objects(values)
2339+
2340+
# if we have the labels, extract the values with a mask
2341+
if labels is not None:
2342+
mask = labels == -1
2343+
values = values.take(labels)
2344+
if mask.any():
2345+
values, changed = com._maybe_upcast_putmask(values,
2346+
mask, np.nan)
23422347
return values
23432348

23442349
new_index = np.arange(len(new_obj))
@@ -2371,7 +2376,7 @@ def _maybe_cast(values, labels=None):
23712376
col_name = tuple(name_lst)
23722377

23732378
# to ndarray and maybe infer different dtype
2374-
level_values = _maybe_cast(lev.values, lab)
2379+
level_values = _maybe_casted_values(lev, lab)
23752380
if level is None or i in level:
23762381
new_obj.insert(0, col_name, level_values)
23772382

@@ -2387,13 +2392,7 @@ def _maybe_cast(values, labels=None):
23872392
lev_num = self.columns._get_level_number(col_level)
23882393
name_lst[lev_num] = name
23892394
name = tuple(name_lst)
2390-
if isinstance(self.index, PeriodIndex):
2391-
values = self.index.asobject
2392-
elif (isinstance(self.index, DatetimeIndex) and
2393-
self.index.tz is not None):
2394-
values = self.index.asobject
2395-
else:
2396-
values = _maybe_cast(self.index.values)
2395+
values = _maybe_casted_values(self.index)
23972396
new_obj.insert(0, name, values)
23982397

23992398
new_obj.index = new_index

pandas/tests/test_frame.py

+14
Original file line numberDiff line numberDiff line change
@@ -2182,6 +2182,20 @@ def test_set_index_cast_datetimeindex(self):
21822182
df.pop('ts')
21832183
assert_frame_equal(df, expected)
21842184

2185+
# GH 3950
2186+
# reset_index with single level
2187+
for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']:
2188+
idx = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx')
2189+
df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)
2190+
2191+
expected = pd.DataFrame({'idx': [datetime(2011, 1, 1), datetime(2011, 1, 2),
2192+
datetime(2011, 1, 3), datetime(2011, 1, 4),
2193+
datetime(2011, 1, 5)],
2194+
'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']},
2195+
columns=['idx', 'a', 'b'])
2196+
expected['idx'] = expected['idx'].apply(lambda d: pd.Timestamp(d, tz=tz))
2197+
assert_frame_equal(df.reset_index(), expected)
2198+
21852199
def test_set_index_multiindexcolumns(self):
21862200
columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)])
21872201
df = DataFrame(np.random.randn(3, 3), columns=columns)

pandas/tests/test_multilevel.py

+40
Original file line numberDiff line numberDiff line change
@@ -2078,6 +2078,46 @@ def test_set_index_datetime(self):
20782078
self.assertTrue(df.index.get_level_values(1).equals(idx2))
20792079
self.assertTrue(df.index.get_level_values(2).equals(idx3))
20802080

2081+
def test_reset_index_datetime(self):
2082+
# GH 3950
2083+
for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']:
2084+
idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx1')
2085+
idx2 = pd.Index(range(5), name='idx2')
2086+
idx = pd.MultiIndex.from_arrays([idx1, idx2])
2087+
df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)
2088+
2089+
expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1),
2090+
datetime.datetime(2011, 1, 2),
2091+
datetime.datetime(2011, 1, 3),
2092+
datetime.datetime(2011, 1, 4),
2093+
datetime.datetime(2011, 1, 5)],
2094+
'idx2': range(5),
2095+
'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']},
2096+
columns=['idx1', 'idx2', 'a', 'b'])
2097+
expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz))
2098+
assert_frame_equal(df.reset_index(), expected)
2099+
2100+
idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3')
2101+
idx = pd.MultiIndex.from_arrays([idx1, idx2, idx3])
2102+
df = pd.DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)
2103+
2104+
expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1),
2105+
datetime.datetime(2011, 1, 2),
2106+
datetime.datetime(2011, 1, 3),
2107+
datetime.datetime(2011, 1, 4),
2108+
datetime.datetime(2011, 1, 5)],
2109+
'idx2': range(5),
2110+
'idx3': [datetime.datetime(2012, 1, 1),
2111+
datetime.datetime(2012, 2, 1),
2112+
datetime.datetime(2012, 3, 1),
2113+
datetime.datetime(2012, 4, 1),
2114+
datetime.datetime(2012, 5, 1)],
2115+
'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']},
2116+
columns=['idx1', 'idx2', 'idx3', 'a', 'b'])
2117+
expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz))
2118+
expected['idx3'] = expected['idx3'].apply(lambda d: pd.Timestamp(d, tz='Europe/Paris'))
2119+
assert_frame_equal(df.reset_index(), expected)
2120+
20812121
def test_set_index_period(self):
20822122
# GH 6631
20832123
df = DataFrame(np.random.random(6))

0 commit comments

Comments
 (0)