diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index dacd433f112a5..2caffab2edc21 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -297,6 +297,7 @@ Timezones - Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`) - Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`) - Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) +- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 39b955ea7cbe1..c21af1c7e820d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1674,8 +1674,8 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = lk._data - rk = rk._data + lk = getattr(lk, '_values', lk)._data + rk = getattr(rk, '_values', rk)._data elif (is_categorical_dtype(lk) and is_categorical_dtype(rk) and diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 6703a0e3355d1..df10950c693a6 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -679,7 +679,7 @@ def test_join_multi_to_multi(self, join_type): right.join(left, on=['abc', 'xy'], how=join_type) def test_join_on_tz_aware_datetimeindex(self): - # GH 23931 + # GH 23931, 26335 df1 = pd.DataFrame( { 'date': pd.date_range(start='2018-01-01', periods=5, @@ -697,7 +697,8 @@ def test_join_on_tz_aware_datetimeindex(self): ) result = df1.join(df2.set_index('date'), on='date') expected = df1.copy() - expected['vals_2'] = pd.Series([np.nan] * len(expected), dtype=object) + expected['vals_2'] = pd.Series([np.nan] * 2 + list('tuv'), + dtype=object) assert_frame_equal(result, expected)