Skip to content

Commit c59b217

Browse files
committed
Merge pull request #7092 from sinhrks/appendtz
BUG: tz info lost by set_index and reindex
2 parents 2995737 + b716e67 commit c59b217

File tree

10 files changed

+184
-17
lines changed

10 files changed

+184
-17
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,8 @@ Bug Fixes
495495
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)
496496
- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`)
497497
- Bug in ``quantile`` with datetime values (:issue:`6965`)
498+
- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`)
499+
- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`)
498500

499501
pandas 0.13.1
500502
-------------

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2220,7 +2220,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
22202220
for i in range(self.index.nlevels):
22212221
arrays.append(self.index.get_level_values(i))
22222222
else:
2223-
arrays.append(np.asarray(self.index))
2223+
arrays.append(self.index)
22242224

22252225
to_remove = []
22262226
for col in keys:
@@ -2232,9 +2232,12 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
22322232

22332233
level = col.get_level_values(col.nlevels - 1)
22342234
names.extend(col.names)
2235-
elif isinstance(col, (Series, Index)):
2235+
elif isinstance(col, Series):
22362236
level = col.values
22372237
names.append(col.name)
2238+
elif isinstance(col, Index):
2239+
level = col
2240+
names.append(col.name)
22382241
elif isinstance(col, (list, np.ndarray)):
22392242
level = col
22402243
names.append(None)

pandas/core/index.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
114114

115115
# no class inference!
116116
if fastpath:
117-
subarr = data.view(cls)
118-
subarr.name = name
119-
return subarr
117+
return cls._simple_new(data, name)
120118

121119
from pandas.tseries.period import PeriodIndex
122120
if isinstance(data, (np.ndarray, ABCSeries)):
@@ -185,6 +183,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
185183
subarr._set_names([name])
186184
return subarr
187185

186+
@classmethod
187+
def _simple_new(cls, values, name, **kwargs):
188+
result = values.view(cls)
189+
result.name = name
190+
return result
191+
188192
def is_(self, other):
189193
"""
190194
More flexible, faster check like ``is`` but that works through views
@@ -2588,11 +2592,12 @@ def get_level_values(self, level):
25882592
values : ndarray
25892593
"""
25902594
num = self._get_level_number(level)
2591-
unique_vals = self.levels[num] # .values
2595+
unique = self.levels[num] # .values
25922596
labels = self.labels[num]
2593-
values = Index(com.take_1d(unique_vals.values, labels,
2594-
fill_value=unique_vals._na_value))
2595-
values.name = self.names[num]
2597+
filled = com.take_1d(unique.values, labels, fill_value=unique._na_value)
2598+
values = unique._simple_new(filled, self.names[num],
2599+
freq=getattr(unique, 'freq', None),
2600+
tz=getattr(unique, 'tz', None))
25962601
return values
25972602

25982603
def format(self, space=2, sparsify=None, adjoin=True, names=False,

pandas/core/reshape.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,10 @@ def __init__(self, values, index, level=-1, value_columns=None):
8282
labels = index.labels
8383

8484
def _make_index(lev, lab):
85-
if isinstance(lev, PeriodIndex):
86-
i = lev.copy()
87-
else:
88-
i = lev.__class__(_make_index_array_level(lev.values, lab))
89-
i.name = lev.name
85+
values = _make_index_array_level(lev.values, lab)
86+
i = lev._simple_new(values, lev.name,
87+
freq=getattr(lev, 'freq', None),
88+
tz=getattr(lev, 'tz', None))
9089
return i
9190

9291
self.new_index_levels = [_make_index(lev, lab)

pandas/tests/test_index.py

+13
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,19 @@ def test_index_ctor_infer_periodindex(self):
180180
assert_array_equal(rs, xp)
181181
tm.assert_isinstance(rs, PeriodIndex)
182182

183+
def test_constructor_simple_new(self):
184+
idx = Index([1, 2, 3, 4, 5], name='int')
185+
result = idx._simple_new(idx, 'int')
186+
self.assert_(result.equals(idx))
187+
188+
idx = Index([1.1, np.nan, 2.2, 3.0], name='float')
189+
result = idx._simple_new(idx, 'float')
190+
self.assert_(result.equals(idx))
191+
192+
idx = Index(['A', 'B', 'C', np.nan], name='obj')
193+
result = idx._simple_new(idx, 'obj')
194+
self.assert_(result.equals(idx))
195+
183196
def test_copy(self):
184197
i = Index([], name='Foo')
185198
i_copy = i.copy()

pandas/tests/test_multilevel.py

+72
Original file line numberDiff line numberDiff line change
@@ -1989,6 +1989,78 @@ def test_datetimeindex(self):
19891989
self.assert_(idx.levels[0].equals(expected1))
19901990
self.assert_(idx.levels[1].equals(idx2))
19911991

1992+
def test_set_index_datetime(self):
1993+
# GH 3950
1994+
df = pd.DataFrame({'label':['a', 'a', 'a', 'b', 'b', 'b'],
1995+
'datetime':['2011-07-19 07:00:00', '2011-07-19 08:00:00',
1996+
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
1997+
'2011-07-19 08:00:00', '2011-07-19 09:00:00'],
1998+
'value':range(6)})
1999+
df.index = pd.to_datetime(df.pop('datetime'), utc=True)
2000+
df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
2001+
2002+
expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'])
2003+
expected = expected.tz_localize('UTC').tz_convert('US/Pacific')
2004+
2005+
df = df.set_index('label', append=True)
2006+
self.assert_(df.index.levels[0].equals(expected))
2007+
self.assert_(df.index.levels[1].equals(pd.Index(['a', 'b'])))
2008+
2009+
df = df.swaplevel(0, 1)
2010+
self.assert_(df.index.levels[0].equals(pd.Index(['a', 'b'])))
2011+
self.assert_(df.index.levels[1].equals(expected))
2012+
2013+
2014+
df = DataFrame(np.random.random(6))
2015+
idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
2016+
'2011-07-19 09:00:00', '2011-07-19 07:00:00',
2017+
'2011-07-19 08:00:00', '2011-07-19 09:00:00'], tz='US/Eastern')
2018+
idx2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-01 09:00', '2012-04-01 09:00',
2019+
'2012-04-02 09:00', '2012-04-02 09:00', '2012-04-02 09:00'],
2020+
tz='US/Eastern')
2021+
idx3 = pd.date_range('2011-01-01 09:00', periods=6, tz='Asia/Tokyo')
2022+
2023+
df = df.set_index(idx1)
2024+
df = df.set_index(idx2, append=True)
2025+
df = df.set_index(idx3, append=True)
2026+
2027+
expected1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00',
2028+
'2011-07-19 09:00:00'], tz='US/Eastern')
2029+
expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern')
2030+
2031+
self.assert_(df.index.levels[0].equals(expected1))
2032+
self.assert_(df.index.levels[1].equals(expected2))
2033+
self.assert_(df.index.levels[2].equals(idx3))
2034+
2035+
# GH 7092
2036+
self.assert_(df.index.get_level_values(0).equals(idx1))
2037+
self.assert_(df.index.get_level_values(1).equals(idx2))
2038+
self.assert_(df.index.get_level_values(2).equals(idx3))
2039+
2040+
def test_set_index_period(self):
2041+
# GH 6631
2042+
df = DataFrame(np.random.random(6))
2043+
idx1 = pd.period_range('2011-01-01', periods=3, freq='M')
2044+
idx1 = idx1.append(idx1)
2045+
idx2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')
2046+
idx2 = idx2.append(idx2).append(idx2)
2047+
idx3 = pd.period_range('2005', periods=6, freq='Y')
2048+
2049+
df = df.set_index(idx1)
2050+
df = df.set_index(idx2, append=True)
2051+
df = df.set_index(idx3, append=True)
2052+
2053+
expected1 = pd.period_range('2011-01-01', periods=3, freq='M')
2054+
expected2 = pd.period_range('2013-01-01 09:00', periods=2, freq='H')
2055+
2056+
self.assert_(df.index.levels[0].equals(expected1))
2057+
self.assert_(df.index.levels[1].equals(expected2))
2058+
self.assert_(df.index.levels[2].equals(idx3))
2059+
2060+
self.assert_(df.index.get_level_values(0).equals(idx1))
2061+
self.assert_(df.index.get_level_values(1).equals(idx2))
2062+
self.assert_(df.index.get_level_values(2).equals(idx3))
2063+
19922064

19932065
if __name__ == '__main__':
19942066

pandas/tools/tests/test_pivot.py

+39-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
from numpy.testing import assert_equal
55

6-
import pandas
6+
import pandas as pd
77
from pandas import DataFrame, Series, Index, MultiIndex, Grouper
88
from pandas.tools.merge import concat
99
from pandas.tools.pivot import pivot_table, crosstab
@@ -181,6 +181,42 @@ def test_pivot_index_with_nan(self):
181181
columns = Index(['C1','C2','C3','C4'],name='b'))
182182
tm.assert_frame_equal(result, expected)
183183

184+
def test_pivot_with_tz(self):
185+
# GH 5878
186+
df = DataFrame({'dt1': [datetime.datetime(2013, 1, 1, 9, 0),
187+
datetime.datetime(2013, 1, 2, 9, 0),
188+
datetime.datetime(2013, 1, 1, 9, 0),
189+
datetime.datetime(2013, 1, 2, 9, 0)],
190+
'dt2': [datetime.datetime(2014, 1, 1, 9, 0),
191+
datetime.datetime(2014, 1, 1, 9, 0),
192+
datetime.datetime(2014, 1, 2, 9, 0),
193+
datetime.datetime(2014, 1, 2, 9, 0)],
194+
'data1': range(4), 'data2': range(4)})
195+
196+
df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d, tz='US/Pacific'))
197+
df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d, tz='Asia/Tokyo'))
198+
199+
exp_col1 = Index(['data1', 'data1', 'data2', 'data2'])
200+
exp_col2 = pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'] * 2,
201+
name='dt2', tz='Asia/Tokyo')
202+
exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2])
203+
expected = DataFrame([[0, 2, 0, 2], [1, 3, 1, 3]],
204+
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
205+
name='dt1', tz='US/Pacific'),
206+
columns=exp_col)
207+
208+
pv = df.pivot(index='dt1', columns='dt2')
209+
tm.assert_frame_equal(pv, expected)
210+
211+
expected = DataFrame([[0, 2], [1, 3]],
212+
index=pd.DatetimeIndex(['2013/01/01 09:00', '2013/01/02 09:00'],
213+
name='dt1', tz='US/Pacific'),
214+
columns=pd.DatetimeIndex(['2014/01/01 09:00', '2014/01/02 09:00'],
215+
name='dt2', tz='Asia/Tokyo'))
216+
217+
pv = df.pivot(index='dt1', columns='dt2', values='data1')
218+
tm.assert_frame_equal(pv, expected)
219+
184220
def test_margins(self):
185221
def _check_output(res, col, index=['A', 'B'], columns=['C']):
186222
cmarg = res['All'][:-1]
@@ -235,7 +271,7 @@ def test_pivot_integer_columns(self):
235271
d = datetime.date.min
236272
data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
237273
[d + datetime.timedelta(i) for i in range(20)], [1.0]))
238-
df = pandas.DataFrame(data)
274+
df = DataFrame(data)
239275
table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2])
240276

241277
df2 = df.rename(columns=str)
@@ -286,7 +322,7 @@ def test_pivot_columns_lexsorted(self):
286322
iproduct = np.random.randint(0, len(products), n)
287323
items['Index'] = products['Index'][iproduct]
288324
items['Symbol'] = products['Symbol'][iproduct]
289-
dr = pandas.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
325+
dr = pd.date_range(datetime.date(2000, 1, 1), datetime.date(2010, 12, 31))
290326
dates = dr[np.random.randint(0, len(dr), n)]
291327
items['Year'] = dates.year
292328
items['Month'] = dates.month

pandas/tseries/period.py

+7
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,13 @@ def _from_arraylike(cls, data, freq, tz):
669669

670670
return data, freq
671671

672+
@classmethod
673+
def _simple_new(cls, values, name, freq=None, **kwargs):
674+
result = values.view(cls)
675+
result.name = name
676+
result.freq = freq
677+
return result
678+
672679
def __contains__(self, key):
673680
if not isinstance(key, Period) or key.freq != self.freq:
674681
if isinstance(key, compat.string_types):

pandas/tseries/tests/test_period.py

+19
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,14 @@ def test_constructor_datetime64arr(self):
11941194

11951195
self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
11961196

1197+
def test_constructor_simple_new(self):
1198+
idx = period_range('2007-01', name='p', periods=20, freq='M')
1199+
result = idx._simple_new(idx, 'p', freq=idx.freq)
1200+
self.assert_(result.equals(idx))
1201+
1202+
result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq)
1203+
self.assert_(result.equals(idx))
1204+
11971205
def test_is_(self):
11981206
create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
11991207
end='12/1/2009')
@@ -1390,6 +1398,17 @@ def test_frame_setitem(self):
13901398
tm.assert_isinstance(rs.index, PeriodIndex)
13911399
self.assert_(rs.index.equals(rng))
13921400

1401+
def test_period_set_index_reindex(self):
1402+
# GH 6631
1403+
df = DataFrame(np.random.random(6))
1404+
idx1 = period_range('2011/01/01', periods=6, freq='M')
1405+
idx2 = period_range('2013', periods=6, freq='A')
1406+
1407+
df = df.set_index(idx1)
1408+
self.assert_(df.index.equals(idx1))
1409+
df = df.reindex(idx2)
1410+
self.assert_(df.index.equals(idx2))
1411+
13931412
def test_nested_dict_frame_constructor(self):
13941413
rng = period_range('1/1/2000', periods=5)
13951414
df = DataFrame(randn(10, 5), columns=rng)

pandas/tseries/tests/test_timeseries.py

+11
Original file line numberDiff line numberDiff line change
@@ -2510,6 +2510,17 @@ def test_dti_reset_index_round_trip(self):
25102510
self.assertEquals(df.index[0], stamp)
25112511
self.assertEquals(df.reset_index()['Date'][0], stamp)
25122512

2513+
def test_dti_set_index_reindex(self):
2514+
# GH 6631
2515+
df = DataFrame(np.random.random(6))
2516+
idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
2517+
idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')
2518+
2519+
df = df.set_index(idx1)
2520+
self.assert_(df.index.equals(idx1))
2521+
df = df.reindex(idx2)
2522+
self.assert_(df.index.equals(idx2))
2523+
25132524
def test_datetimeindex_union_join_empty(self):
25142525
dti = DatetimeIndex(start='1/1/2001', end='2/1/2001', freq='D')
25152526
empty = Index([])

0 commit comments

Comments
 (0)