From 66e16aa572178d6cc8590e3fcfe3326e64ee33a0 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 12 May 2019 22:55:50 -0700 Subject: [PATCH 1/2] BUG: DataFrame.join on tz aware index and column --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/reshape/merge.py | 14 ++++++++++++-- pandas/tests/reshape/merge/test_join.py | 5 +++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index dacd433f112a5..2caffab2edc21 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -297,6 +297,7 @@ Timezones - Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`) - Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`) - Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) +- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 39b955ea7cbe1..a3a60c08691f3 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1671,11 +1671,21 @@ def _right_outer_join(x, y, max_groups): } +def _convert_array_or_index(arg): + """Converts DatetimeArray or DatetimeIndex to numpy array in UTC""" + try: + # DatetimeIndex case + return arg._values._data + except AttributeError: + # DatetimeArray Case + return arg._data + + def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = lk._data - rk = rk._data + lk = _convert_array_or_index(lk) + rk = _convert_array_or_index(rk) elif (is_categorical_dtype(lk) and is_categorical_dtype(rk) and diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 6703a0e3355d1..df10950c693a6 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -679,7 +679,7 @@ def test_join_multi_to_multi(self, join_type): right.join(left, on=['abc', 'xy'], how=join_type) def test_join_on_tz_aware_datetimeindex(self): - # GH 23931 + # GH 23931, 26335 df1 = pd.DataFrame( { 'date': pd.date_range(start='2018-01-01', periods=5, @@ -697,7 +697,8 @@ def test_join_on_tz_aware_datetimeindex(self): ) result = df1.join(df2.set_index('date'), on='date') expected = df1.copy() - expected['vals_2'] = pd.Series([np.nan] * len(expected), dtype=object) + expected['vals_2'] = pd.Series([np.nan] * 2 + list('tuv'), + dtype=object) assert_frame_equal(result, expected) From a4444d49ffd01730b7587244c4756ac43ce93993 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 13 May 2019 09:55:26 -0700 Subject: [PATCH 2/2] simplify ndarray creation --- pandas/core/reshape/merge.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index a3a60c08691f3..c21af1c7e820d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1671,21 +1671,11 @@ def _right_outer_join(x, y, max_groups): } -def _convert_array_or_index(arg): - """Converts DatetimeArray or DatetimeIndex to numpy array in UTC""" - try: - # DatetimeIndex case - return arg._values._data - except AttributeError: - # DatetimeArray Case - return arg._data - - def _factorize_keys(lk, rk, sort=True): # Some pre-processing for non-ndarray lk / rk if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): - lk = _convert_array_or_index(lk) - rk = _convert_array_or_index(rk) + lk = getattr(lk, '_values', lk)._data + rk = getattr(rk, '_values', rk)._data elif (is_categorical_dtype(lk) and is_categorical_dtype(rk) and