Skip to content

Commit ce2a48c

Browse files
MridulSjreback
authored andcommitted
BUG: add _tz_compare() to handle comparisons of pytz tzs correctly
closes pandas-dev#18523
1 parent 04a6815 commit ce2a48c

File tree

4 files changed

+92
-2
lines changed

4 files changed

+92
-2
lines changed

pandas/core/indexes/datetimes.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -1181,6 +1181,28 @@ def join(self, other, how='left', level=None, return_indexers=False,
11811181
return Index.join(this, other, how=how, level=level,
11821182
return_indexers=return_indexers, sort=sort)
11831183

1184+
def _tz_compare(self, other):
1185+
"""
1186+
Compare string representations of timezones of two DatetimeIndex as
1187+
directly comparing equality is broken. The same timezone can be
1188+
represented as different instances of timezones. For example
1189+
`<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
1190+
`<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
1191+
timezones but aren't evaluted such, but the string representation
1192+
for both of these is `'Europe/Paris'`.
1193+
1194+
Parameters
1195+
----------
1196+
other: DatetimeIndex
1197+
1198+
Returns:
1199+
-------
1200+
compare : Boolean
1201+
1202+
"""
1203+
# GH 18523
1204+
return str(self.tzinfo) == str(other.tzinfo)
1205+
11841206
def _maybe_utc_convert(self, other):
11851207
this = self
11861208
if isinstance(other, DatetimeIndex):
@@ -1192,7 +1214,7 @@ def _maybe_utc_convert(self, other):
11921214
raise TypeError('Cannot join tz-naive with tz-aware '
11931215
'DatetimeIndex')
11941216

1195-
if self.tz != other.tz:
1217+
if not self._tz_compare(other):
11961218
this = self.tz_convert('UTC')
11971219
other = other.tz_convert('UTC')
11981220
return this, other
@@ -1296,7 +1318,7 @@ def __iter__(self):
12961318

12971319
def _wrap_union_result(self, other, result):
12981320
name = self.name if self.name == other.name else None
1299-
if self.tz != other.tz:
1321+
if not self._tz_compare(other):
13001322
raise ValueError('Passed item and index have different timezone')
13011323
return self._simple_new(result, name=name, freq=None, tz=self.tz)
13021324

pandas/tests/indexes/datetimes/test_construction.py

+20
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,26 @@ def test_000constructor_resolution(self):
443443

444444
assert idx.nanosecond[0] == t1.nanosecond
445445

446+
def test_concat(self):
447+
idx1 = pd.date_range('2011-01-01', periods=3, freq='H',
448+
tz='Europe/Paris')
449+
idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H')
450+
df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1)
451+
df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2)
452+
res = pd.concat([df1, df2], axis=1)
453+
454+
assert str(res.index.tzinfo) == str(df1.index.tzinfo)
455+
assert str(res.index.tzinfo) == str(df2.index.tzinfo)
456+
457+
idx3 = pd.date_range('2011-01-01', periods=3,
458+
freq='H', tz='Asia/Tokyo')
459+
df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3)
460+
res = pd.concat([df1, df3], axis=1)
461+
462+
assert str(res.index.tzinfo) == 'UTC'
463+
assert str(res.index.tzinfo) != str(df1.index.tzinfo)
464+
assert str(res.index.tzinfo) != str(df3.index.tzinfo)
465+
446466

447467
class TestTimeSeries(object):
448468

pandas/tests/indexes/datetimes/test_tools.py

+9
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,15 @@ def test_to_datetime_list_of_integers(self):
918918

919919
tm.assert_index_equal(rng, result)
920920

921+
<<<<<<< HEAD
922+
=======
923+
def test_to_datetime_freq(self):
924+
xp = bdate_range('2000-1-1', periods=10, tz='UTC')
925+
rs = xp.to_datetime()
926+
assert xp.freq == rs.freq
927+
assert xp._tz_compare(rs)
928+
929+
>>>>>>> PR_TOOL_MERGE_PR_18596
921930
def test_to_datetime_overflow(self):
922931
# gh-17637
923932
# we are overflowing Timedelta range here

pandas/tests/reshape/test_concat.py

+39
Original file line numberDiff line numberDiff line change
@@ -2074,6 +2074,45 @@ def test_concat_order(self):
20742074
expected = expected.sort_values()
20752075
tm.assert_index_equal(result, expected)
20762076

2077+
def test_concat_datetime_timezone(self):
2078+
# GH 18523
2079+
idx1 = pd.date_range('2011-01-01', periods=3, freq='H',
2080+
tz='Europe/Paris')
2081+
idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq='H')
2082+
df1 = pd.DataFrame({'a': [1, 2, 3]}, index=idx1)
2083+
df2 = pd.DataFrame({'b': [1, 2, 3]}, index=idx2)
2084+
res = pd.concat([df1, df2], axis=1)
2085+
2086+
exp_idx = DatetimeIndex(['2011-01-01 00:00:00+01:00',
2087+
'2011-01-01 01:00:00+01:00',
2088+
'2011-01-01 02:00:00+01:00'],
2089+
freq='H'
2090+
).tz_localize('UTC').tz_convert('Europe/Paris')
2091+
2092+
exp = pd.DataFrame([[1, 1], [2, 2], [3, 3]],
2093+
index=exp_idx, columns=['a', 'b'])
2094+
2095+
tm.assert_frame_equal(res, exp)
2096+
2097+
idx3 = pd.date_range('2011-01-01', periods=3,
2098+
freq='H', tz='Asia/Tokyo')
2099+
df3 = pd.DataFrame({'b': [1, 2, 3]}, index=idx3)
2100+
res = pd.concat([df1, df3], axis=1)
2101+
2102+
exp_idx = DatetimeIndex(['2010-12-31 15:00:00+00:00',
2103+
'2010-12-31 16:00:00+00:00',
2104+
'2010-12-31 17:00:00+00:00',
2105+
'2010-12-31 23:00:00+00:00',
2106+
'2011-01-01 00:00:00+00:00',
2107+
'2011-01-01 01:00:00+00:00']
2108+
).tz_localize('UTC')
2109+
2110+
exp = pd.DataFrame([[np.nan, 1], [np.nan, 2], [np.nan, 3],
2111+
[1, np.nan], [2, np.nan], [3, np.nan]],
2112+
index=exp_idx, columns=['a', 'b'])
2113+
2114+
tm.assert_frame_equal(res, exp)
2115+
20772116

20782117
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
20792118
@pytest.mark.parametrize('dt', np.sctypes['float'])

0 commit comments

Comments
 (0)