From 9e7c26c49594a0be14e6f5a4a410f291146bfbc7 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 5 Jul 2019 00:13:02 -0700 Subject: [PATCH] BUG: merge_asof with multiple by columns with tz --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/reshape/merge.py | 3 ++ pandas/tests/reshape/merge/test_merge_asof.py | 32 +++++++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ab242ece98181..101addfa097f8 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1151,6 +1151,7 @@ Reshaping - Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) - Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) - Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) +- Bug in :func:`merge_asof` when specifying multiple ``by`` columns where one is ``datetime64[ns, tz]`` dtype (:issue:`26649`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4f910f6a278ad..c1a07c129f7cd 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1686,6 +1686,9 @@ def _get_join_indexers(self): def flip(xs): """ unlike np.transpose, this returns an array of tuples """ + xs = [ + x if not is_extension_array_dtype(x) else x._ndarray_values for x in xs + ] labels = list(string.ascii_lowercase[: len(xs)]) dtypes = [x.dtype for x in xs] labeled_dtypes = list(zip(labels, dtypes)) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index e2e17397464fe..6b66386bafc5e 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -190,9 +190,9 @@ def test_basic_left_index(self): result = merge_asof( trades, quotes, left_index=True, right_on="time", by="ticker" ) - # left-only index uses right's index, oddly + # left-only index uses right"s index, oddly expected.index = result.index - # time column appears after left's columns + # time column appears after left"s columns expected = expected[result.columns] assert_frame_equal(result, expected) @@ -233,7 +233,7 @@ def test_multi_index(self): def test_on_and_index(self): - # 'on' parameter and index together is prohibited + # "on" parameter and index together is prohibited trades = self.trades.set_index("time") quotes = self.quotes.set_index("time") with pytest.raises(MergeError): @@ -1220,3 +1220,29 @@ def test_merge_by_col_tz_aware(self): columns=["by_col", "on_col", "values_x", "values_y"], ) assert_frame_equal(result, expected) + + def test_by_mixed_tz_aware(self): + # GH 26649 + left = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["HELLO"], + "on_col": [2], + "value": ["a"], + } + ) + right = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["WORLD"], + "on_col": [1], + "value": ["b"], + } + ) + result = pd.merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col") + expected = pd.DataFrame( + [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], + columns=["by_col1", "by_col2", "on_col", "value_x"], + ) + expected["value_y"] = np.array([np.nan], dtype=object) + assert_frame_equal(result, expected)