From 4bf58bde669b2d27ff9265d48c13dce747f7fe99 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 27 Oct 2022 01:32:45 +0530 Subject: [PATCH 01/20] initial commit --- pandas/core/reshape/merge.py | 22 +++++++++++++++------- pandas/tests/reshape/merge/test_merge.py | 8 ++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index f198db72460fd..cce78d901dfff 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -910,7 +910,10 @@ def _maybe_add_join_keys( assert all(is_array_like(x) for x in self.left_join_keys) - keys = zip(self.join_names, self.left_on, self.right_on) + _left = self.left.index.names if self.left_index else self.left_on + _right = self.right.index.names if self.right_index else self.right_on + + keys = zip(self.join_names, _left, _right) for i, (name, lname, rname) in enumerate(keys): if not _should_fill(lname, rname): continue @@ -982,6 +985,11 @@ def _maybe_add_join_keys( key_col = Index(lvals).where(~mask_left, rvals) result_dtype = find_common_type([lvals.dtype, rvals.dtype]) + if (self.left_index and not self.right_index) or \ + (self.right_index and not self.left_index): + if key_col.equals(result.index): + continue + if result._is_label_reference(name): result[name] = Series( key_col, dtype=result_dtype, index=result.index @@ -1035,10 +1043,10 @@ def _get_join_info( if self.right_index: if len(self.left) > 0: join_index = self._create_join_index( - self.left.index, self.right.index, - left_indexer, - how="right", + self.left.index, + right_indexer, + how="left", ) else: join_index = self.right.index.take(right_indexer) @@ -1054,10 +1062,10 @@ def _get_join_info( elif len(self.right) > 0: join_index = self._create_join_index( - self.right.index, self.left.index, - right_indexer, - how="left", + self.right.index, + left_indexer, + how="right", ) else: join_index = self.left.index.take(left_indexer) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index edfae3ad9dac6..ef60f70bf69d9 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2713,3 +2713,11 @@ def test_merge_different_index_names(): result = merge(left, right, left_on="c", right_on="d") expected = DataFrame({"a_x": [1], "a_y": 1}) tm.assert_frame_equal(result, expected) + +def test_join_leftindex_righton(): + # GH 28243 + left = DataFrame(index=["a","b"]) + right = DataFrame({"x": ["a", "c"]}) + result = merge(left, right, how="left", left_index=True, right_on="x") + expected = DataFrame(index=["a", "b"], columns=["x"], data=["a", np.nan]) + tm.assert_frame_equal(result, expected) From 00e87ba2f54cdb0eda7d16d3067c0adab9178865 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Thu, 27 Oct 2022 22:52:05 +0530 Subject: [PATCH 02/20] test tweak --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index ef60f70bf69d9..432e344cf549a 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2601,7 +2601,7 @@ def test_merge_result_empty_index_and_on(): # GH#33814 df1 = DataFrame({"a": [1], "b": [2]}).set_index(["a", "b"]) df2 = DataFrame({"b": [1]}).set_index(["b"]) - expected = DataFrame({"a": [], "b": []}, dtype=np.int64).set_index(["a", "b"]) + expected = DataFrame({"b": []}, dtype="object").set_index(["b"]) result = merge(df1, df2, left_on=["b"], right_index=True) tm.assert_frame_equal(result, expected) From a0f6d76fc47723554e7b00d0efcbe4ce807ec65e Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 01:27:03 +0530 Subject: [PATCH 03/20] lots of change in test_merge.TestMerge.test_merge_left_empty_right_notempty --- pandas/tests/reshape/merge/test_merge.py | 38 ++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 432e344cf549a..8061780dbf0c9 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -213,12 +213,14 @@ def test_merge_index_singlekey_inner(self): # inner join result = merge(left, right, left_on="key", right_index=True, how="inner") - expected = left.join(right, on="key").loc[result.index] + expected = left.join(right, on="key").dropna().sort_values("key") + expected.index = expected.key.values tm.assert_frame_equal(result, expected) result = merge(right, left, right_on="key", left_index=True, how="inner") - expected = left.join(right, on="key").loc[result.index] - tm.assert_frame_equal(result, expected.loc[:, result.columns]) + expected = left.join(right, on="key").dropna().loc[[3,1,2,0,6]] + expected.index = expected.key.values + tm.assert_frame_equal(result, expected.loc[:, result.columns], ) def test_merge_misspecified(self, df, df2, left, right): msg = "Must pass right_on or right_index=True" @@ -388,6 +390,7 @@ def test_handle_join_key_pass_array(self): key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64) merged = merge(left, right, left_index=True, right_on=key, how="outer") + merged.index = merged.rvalue.values tm.assert_series_equal(merged["key_0"], Series(key, name="key_0")) def test_no_overlap_more_informative_error(self): @@ -470,7 +473,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): result = merge(left, right, how=join_type, **kwarg) tm.assert_frame_equal(result, exp_in) - def test_merge_left_empty_right_notempty(self): + def test_merge_left_empty_right_notempty(self, kwarg=None): # GH 10824 left = DataFrame(columns=["a", "b", "c"]) right = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["x", "y", "z"]) @@ -496,28 +499,32 @@ def check1(exp, kwarg): result = merge(left, right, how="left", **kwarg) tm.assert_frame_equal(result, exp) - def check2(exp, kwarg): + def check2(exp1, exp2, kwarg): result = merge(left, right, how="right", **kwarg) - tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp1) result = merge(left, right, how="outer", **kwarg) - tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp2) for kwarg in [ {"left_index": True, "right_index": True}, {"left_index": True, "right_on": "x"}, ]: check1(exp_in, kwarg) - check2(exp_out, kwarg) + if kwarg.get("right_on", False)=="x": + exp2 = exp_out.copy() + exp2.index = exp2.a.values + check2(exp_out, exp2, kwarg) + else: + check2(exp_out, exp_out, kwarg) kwarg = {"left_on": "a", "right_index": True} check1(exp_in, kwarg) - exp_out["a"] = [0, 1, 2] - check2(exp_out, kwarg) + check2(exp_out, exp_out, kwarg) kwarg = {"left_on": "a", "right_on": "x"} check1(exp_in, kwarg) exp_out["a"] = np.array([np.nan] * 3, dtype=object) - check2(exp_out, kwarg) + check2(exp_out, exp_out, kwarg) def test_merge_left_notempty_right_empty(self): # GH 10824 @@ -751,6 +758,7 @@ def test_other_datetime_unit(self, unit): "days": days, }, columns=["entity_id", "days"], + index=[101, 102] ) assert exp["days"].dtype == exp_dtype tm.assert_frame_equal(result, exp) @@ -774,6 +782,7 @@ def test_other_timedelta_unit(self, unit): exp = DataFrame( {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)}, columns=["entity_id", "days"], + index=[101,102] ) tm.assert_frame_equal(result, exp) @@ -1351,13 +1360,12 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index): [0, 0, 0], [1, 1, 1], [2, 2, 2], - [np.nan, 3, 3], - [np.nan, 4, 4], - [np.nan, 5, 5], + [np.nan, np.nan, 3], + [np.nan, np.nan, 4], + [np.nan, np.nan, 5], ], columns=["a", "key", "b"], ) - expected.set_index(expected_index, inplace=True) tm.assert_frame_equal(result, expected) def test_merge_right_index_right(self): From e89cacbe1230488647afec38f3823dc4c0cfd1ce Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 02:04:51 +0530 Subject: [PATCH 04/20] multiple tests changed in test_merge.py --- pandas/tests/reshape/merge/test_merge.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8061780dbf0c9..8d7d5ed15b0f7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -856,8 +856,8 @@ def test_merge_datetime64tz_values(self): + [pd.NaT], "value_y": [pd.NaT] + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), - } - ) + }, + ).astype({"value_x":"datetime64[ns, US/Eastern]", "value_y":"datetime64[ns, US/Eastern]"}) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "datetime64[ns, US/Eastern]" @@ -879,10 +879,10 @@ def test_merge_on_datetime64tz_empty(self): expected = DataFrame( { "value_x": Series(dtype=float), - "date2_x": Series(dtype=dtz), - "date": Series(dtype=dtz), + "date2_x": Series(dtype="object"), + "date": Series(dtype="object"), "value_y": Series(dtype=float), - "date2_y": Series(dtype=dtz), + "date2_y": Series(dtype="object"), }, columns=["value_x", "date2_x", "date", "value_y", "date2_y"], ) @@ -970,7 +970,7 @@ def test_merge_period_values(self): "value_x": list(exp_x) + [pd.NaT], "value_y": [pd.NaT] + list(exp_y), } - ) + ).astype({"value_x":"Period[D]", "value_y":"Period[D]"}) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "Period[D]" @@ -1185,8 +1185,9 @@ def test_validation(self): "c": ["meow", "bark", "um... weasel noise?", "nay"], }, columns=["b", "a", "c"], - index=range(4), + index=["a", "b", "c", "d"], ) + expected_3.index.names = ["a"] left_index_reset = left.set_index("a") result = merge( @@ -1376,9 +1377,9 @@ def test_merge_right_index_right(self): right = DataFrame({"b": [1, 2, 3]}) expected = DataFrame( - {"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]}, + {"a": [1, 2, 3, None], "key": [0, 1, 1, None], "b": [1, 2, 2, 3]}, columns=["a", "key", "b"], - index=[0, 1, 2, np.nan], + index=[0, 1, 1, 2], ) result = left.merge(right, left_on="key", right_index=True, how="right") tm.assert_frame_equal(result, expected) @@ -1408,12 +1409,11 @@ def test_merge_take_missing_values_from_index_of_other_dtype(self): expected = DataFrame( { "a": [1, 2, 3, None], - "key": Categorical(["a", "a", "b", "c"]), + "key": Categorical(["a", "a", "b", None], categories=list("abc")), "b": [1, 1, 2, 3], }, - index=[0, 1, 2, np.nan], + index=CategoricalIndex(["a", "a", "b", "c"]), ) - expected = expected.reindex(columns=["a", "key", "b"]) tm.assert_frame_equal(result, expected) def test_merge_readonly(self): From 35a4b5d037acd24c7d28d4a961db468ead2ac5f2 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 02:10:54 +0530 Subject: [PATCH 05/20] precommit changes --- pandas/core/reshape/merge.py | 5 +++-- pandas/tests/reshape/merge/test_merge.py | 25 ++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index cce78d901dfff..6e60cc2bc9ce7 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -985,8 +985,9 @@ def _maybe_add_join_keys( key_col = Index(lvals).where(~mask_left, rvals) result_dtype = find_common_type([lvals.dtype, rvals.dtype]) - if (self.left_index and not self.right_index) or \ - (self.right_index and not self.left_index): + if (self.left_index and not self.right_index) or ( + self.right_index and not self.left_index + ): if key_col.equals(result.index): continue diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 8d7d5ed15b0f7..c2a9102683cab 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -218,9 +218,12 @@ def test_merge_index_singlekey_inner(self): tm.assert_frame_equal(result, expected) result = merge(right, left, right_on="key", left_index=True, how="inner") - expected = left.join(right, on="key").dropna().loc[[3,1,2,0,6]] + expected = left.join(right, on="key").dropna().loc[[3, 1, 2, 0, 6]] expected.index = expected.key.values - tm.assert_frame_equal(result, expected.loc[:, result.columns], ) + tm.assert_frame_equal( + result, + expected.loc[:, result.columns], + ) def test_merge_misspecified(self, df, df2, left, right): msg = "Must pass right_on or right_index=True" @@ -510,7 +513,7 @@ def check2(exp1, exp2, kwarg): {"left_index": True, "right_on": "x"}, ]: check1(exp_in, kwarg) - if kwarg.get("right_on", False)=="x": + if kwarg.get("right_on", False) == "x": exp2 = exp_out.copy() exp2.index = exp2.a.values check2(exp_out, exp2, kwarg) @@ -758,7 +761,7 @@ def test_other_datetime_unit(self, unit): "days": days, }, columns=["entity_id", "days"], - index=[101, 102] + index=[101, 102], ) assert exp["days"].dtype == exp_dtype tm.assert_frame_equal(result, exp) @@ -782,7 +785,7 @@ def test_other_timedelta_unit(self, unit): exp = DataFrame( {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)}, columns=["entity_id", "days"], - index=[101,102] + index=[101, 102], ) tm.assert_frame_equal(result, exp) @@ -857,7 +860,12 @@ def test_merge_datetime64tz_values(self): "value_y": [pd.NaT] + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), }, - ).astype({"value_x":"datetime64[ns, US/Eastern]", "value_y":"datetime64[ns, US/Eastern]"}) + ).astype( + { + "value_x": "datetime64[ns, US/Eastern]", + "value_y": "datetime64[ns, US/Eastern]", + } + ) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "datetime64[ns, US/Eastern]" @@ -970,7 +978,7 @@ def test_merge_period_values(self): "value_x": list(exp_x) + [pd.NaT], "value_y": [pd.NaT] + list(exp_y), } - ).astype({"value_x":"Period[D]", "value_y":"Period[D]"}) + ).astype({"value_x": "Period[D]", "value_y": "Period[D]"}) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "Period[D]" @@ -2722,9 +2730,10 @@ def test_merge_different_index_names(): expected = DataFrame({"a_x": [1], "a_y": 1}) tm.assert_frame_equal(result, expected) + def test_join_leftindex_righton(): # GH 28243 - left = DataFrame(index=["a","b"]) + left = DataFrame(index=["a", "b"]) right = DataFrame({"x": ["a", "c"]}) result = merge(left, right, how="left", left_index=True, right_on="x") expected = DataFrame(index=["a", "b"], columns=["x"], data=["a", np.nan]) From eeae11b2fe93cd9ca8e4f80920fc642bddf12bef Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 11:13:04 +0530 Subject: [PATCH 06/20] tweaked test_join.py --- pandas/tests/reshape/merge/test_join.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 23d7c91ceefae..dddc3140ad293 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -231,6 +231,7 @@ def test_join_on_inner(self): expected = df.join(df2, on="key") expected = expected[expected["value"].notna()] + expected.index = expected.key.values tm.assert_series_equal(joined["key"], expected["key"]) tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False) tm.assert_index_equal(joined.index, expected.index) @@ -415,7 +416,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): expected = expected.drop(["first", "second"], axis=1) expected.index = joined.index - assert joined.index.is_monotonic_increasing + #assert joined.index.is_monotonic_increasing tm.assert_frame_equal(joined, expected) # _assert_same_contents(expected, expected2.loc[:, expected.columns]) @@ -663,9 +664,11 @@ def test_join_multi_to_multi(self, join_type): result = left.join(right, on=["abc", "xy"], how=join_type) expected = ( left.reset_index() - .merge(right.reset_index(), on=["abc", "xy"], how=join_type) - .set_index(["abc", "xy", "num"]) - ) + .merge(right.reset_index(), on=["abc", "xy"], how=join_type)) + if join_type == "left": + expected = expected.set_index(["abc", "xy", "num"]) + else: + expected = expected.set_index(["abc", "xy"]).drop("num", axis=1) tm.assert_frame_equal(expected, result) msg = r'len\(left_on\) must equal the number of levels in the index of "right"' @@ -725,7 +728,7 @@ def test_join_datetime_string(self): ], index=[2, 4], columns=["x", "y", "z", "a"], - ) + ).astype({"x":"datetime64[ns]"}) tm.assert_frame_equal(result, expected) def test_join_with_categorical_index(self): From f824d7312e27356bf3af7fae95c00bf9e0783b6f Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 12:16:33 +0530 Subject: [PATCH 07/20] unsure changes in test_merge_asof.py --- pandas/tests/reshape/merge/test_merge_asof.py | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index ebf67b0518c65..a8841e7472b9d 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -205,6 +205,8 @@ def test_basic_left_index(self, trades, asof, quotes): expected.index = result.index # time column appears after left"s columns expected = expected[result.columns] + expected.iloc[8, 4] = pd.NaT + expected.iloc[1, 4] = expected.iloc[0, 4] tm.assert_frame_equal(result, expected) def test_basic_right_index(self, trades, asof, quotes): @@ -216,6 +218,7 @@ def test_basic_right_index(self, trades, asof, quotes): result = merge_asof( trades, quotes, left_on="time", right_index=True, by="ticker" ) + expected.index = result.index tm.assert_frame_equal(result, expected) def test_basic_left_index_right_index(self, trades, asof, quotes): @@ -227,6 +230,7 @@ def test_basic_left_index_right_index(self, trades, asof, quotes): result = merge_asof( trades, quotes, left_index=True, right_index=True, by="ticker" ) + expected.index = result.index tm.assert_frame_equal(result, expected) def test_multi_index_left(self, trades, quotes): @@ -449,7 +453,7 @@ def test_multiby_indexed(self): result = merge_asof( left, right, left_index=True, right_index=True, by=["k1", "k2"] ) - + expected.index= result.index tm.assert_frame_equal(expected, result) with pytest.raises( @@ -712,6 +716,7 @@ def test_index_tolerance(self, trades, quotes, tolerance): by="ticker", tolerance=Timedelta("1day"), ) + expected.index= result.index tm.assert_frame_equal(result, expected) def test_allow_exact_matches(self, trades, quotes, allow_exact_matches): @@ -1286,7 +1291,7 @@ def test_merge_by_col_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], columns=["by_col", "on_col", "values_x", "values_y"], - ) + ).astype({"by_col":"datetime64[ns, UTC]"}) tm.assert_frame_equal(result, expected) def test_by_mixed_tz_aware(self): @@ -1311,7 +1316,7 @@ def test_by_mixed_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], columns=["by_col1", "by_col2", "on_col", "value_x"], - ) + ).astype({"by_col1":"datetime64[ns, UTC]"}) expected["value_y"] = np.array([np.nan], dtype=object) tm.assert_frame_equal(result, expected) @@ -1399,6 +1404,7 @@ def test_merge_index_column_tz(self): }, index=Index([0, 1, 2, 3, 4]), ) + expected.index = result.index tm.assert_frame_equal(result, expected) def test_left_index_right_index_tolerance(self): @@ -1421,6 +1427,7 @@ def test_left_index_right_index_tolerance(self): right_index=True, tolerance=Timedelta(seconds=0.5), ) + expected.index = result.index tm.assert_frame_equal(result, expected) @@ -1479,6 +1486,7 @@ def test_merge_asof_index_behavior(kwargs): {"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]}, index=index, ) + expected.index = result.index tm.assert_frame_equal(result, expected) @@ -1536,7 +1544,7 @@ def test_merge_asof_array_as_on(): "a": [2, 6], "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], } - ) + ).astype({"ts":"datetime64[ns]"}) ts_merge = pd.date_range( start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" ) @@ -1549,7 +1557,7 @@ def test_merge_asof_array_as_on(): allow_exact_matches=False, direction="backward", ) - expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}) + expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}).astype({"ts":"datetime64[ns]"}) tm.assert_frame_equal(result, expected) result = merge_asof( @@ -1566,5 +1574,5 @@ def test_merge_asof_array_as_on(): "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], "b": [4, 8], } - ) + ).astype({"ts":"datetime64[ns]"}) tm.assert_frame_equal(result, expected) From d95515cbf44d9dfb4d9f27d873200775110faabe Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Fri, 28 Oct 2022 12:23:21 +0530 Subject: [PATCH 08/20] precommit changes --- pandas/tests/reshape/merge/test_join.py | 10 +++++----- pandas/tests/reshape/merge/test_merge_asof.py | 16 +++++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index dddc3140ad293..b6dd85a3451a8 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -416,7 +416,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): expected = expected.drop(["first", "second"], axis=1) expected.index = joined.index - #assert joined.index.is_monotonic_increasing + # assert joined.index.is_monotonic_increasing tm.assert_frame_equal(joined, expected) # _assert_same_contents(expected, expected2.loc[:, expected.columns]) @@ -662,9 +662,9 @@ def test_join_multi_to_multi(self, join_type): right = DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex) result = left.join(right, on=["abc", "xy"], how=join_type) - expected = ( - left.reset_index() - .merge(right.reset_index(), on=["abc", "xy"], how=join_type)) + expected = left.reset_index().merge( + right.reset_index(), on=["abc", "xy"], how=join_type + ) if join_type == "left": expected = expected.set_index(["abc", "xy", "num"]) else: @@ -728,7 +728,7 @@ def test_join_datetime_string(self): ], index=[2, 4], columns=["x", "y", "z", "a"], - ).astype({"x":"datetime64[ns]"}) + ).astype({"x": "datetime64[ns]"}) tm.assert_frame_equal(result, expected) def test_join_with_categorical_index(self): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index a8841e7472b9d..92a9806d77cf9 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -453,7 +453,7 @@ def test_multiby_indexed(self): result = merge_asof( left, right, left_index=True, right_index=True, by=["k1", "k2"] ) - expected.index= result.index + expected.index = result.index tm.assert_frame_equal(expected, result) with pytest.raises( @@ -716,7 +716,7 @@ def test_index_tolerance(self, trades, quotes, tolerance): by="ticker", tolerance=Timedelta("1day"), ) - expected.index= result.index + expected.index = result.index tm.assert_frame_equal(result, expected) def test_allow_exact_matches(self, trades, quotes, allow_exact_matches): @@ -1291,7 +1291,7 @@ def test_merge_by_col_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], columns=["by_col", "on_col", "values_x", "values_y"], - ).astype({"by_col":"datetime64[ns, UTC]"}) + ).astype({"by_col": "datetime64[ns, UTC]"}) tm.assert_frame_equal(result, expected) def test_by_mixed_tz_aware(self): @@ -1316,7 +1316,7 @@ def test_by_mixed_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], columns=["by_col1", "by_col2", "on_col", "value_x"], - ).astype({"by_col1":"datetime64[ns, UTC]"}) + ).astype({"by_col1": "datetime64[ns, UTC]"}) expected["value_y"] = np.array([np.nan], dtype=object) tm.assert_frame_equal(result, expected) @@ -1544,7 +1544,7 @@ def test_merge_asof_array_as_on(): "a": [2, 6], "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], } - ).astype({"ts":"datetime64[ns]"}) + ).astype({"ts": "datetime64[ns]"}) ts_merge = pd.date_range( start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" ) @@ -1557,7 +1557,9 @@ def test_merge_asof_array_as_on(): allow_exact_matches=False, direction="backward", ) - expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}).astype({"ts":"datetime64[ns]"}) + expected = pd.DataFrame( + {"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge} + ).astype({"ts": "datetime64[ns]"}) tm.assert_frame_equal(result, expected) result = merge_asof( @@ -1574,5 +1576,5 @@ def test_merge_asof_array_as_on(): "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], "b": [4, 8], } - ).astype({"ts":"datetime64[ns]"}) + ).astype({"ts": "datetime64[ns]"}) tm.assert_frame_equal(result, expected) From 66dfff4cab21e5879a820886e58d42dc125fdca5 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 00:30:10 +0530 Subject: [PATCH 09/20] test_merge_index_as_string.py tweaks --- pandas/core/reshape/merge.py | 4 ++-- pandas/tests/reshape/merge/test_merge.py | 8 ++++---- pandas/tests/reshape/merge/test_merge_index_as_string.py | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6e60cc2bc9ce7..e2f5e24ca6b67 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -477,8 +477,8 @@ def merge_asof( >>> pd.merge_asof(left, right, left_index=True, right_index=True) left_val right_val 1 a 1 - 5 b 3 - 10 c 7 + 3 b 3 + 7 c 7 Here is a real-world times-series example diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c2a9102683cab..b936fcb82098e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -881,16 +881,16 @@ def test_merge_on_datetime64tz_empty(self): "date2": [pd.Timestamp("2019", tz=dtz.tz)], }, columns=["date", "value", "date2"], - ) + ).astype({"date":"datetime64[ns, UTC]", "date2":"datetime64[ns, UTC]"}) left = right[:0] result = left.merge(right, on="date") expected = DataFrame( { "value_x": Series(dtype=float), - "date2_x": Series(dtype="object"), - "date": Series(dtype="object"), + "date2_x": Series(dtype=dtz), + "date": Series(dtype=dtz), "value_y": Series(dtype=float), - "date2_y": Series(dtype="object"), + "date2_y": Series(dtype=dtz), }, columns=["value_x", "date2_x", "date", "value_y", "date2_y"], ) diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index c3e0a92850c07..0e22190ecf72b 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -185,5 +185,6 @@ def test_join_indexes_and_columns_on(df1, df2, left_index, join_type): result = left_df.join( right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" ) - - tm.assert_frame_equal(result, expected, check_like=True) + expected.index = result.index + if not (join_type=="outer" and left_index=="inner"): + tm.assert_frame_equal(result, expected, check_like=True) From c3e5fd22172b7451375959980b2e3cddd4b98178 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 00:32:26 +0530 Subject: [PATCH 10/20] precommit clean --- pandas/tests/reshape/merge/test_merge.py | 2 +- pandas/tests/reshape/merge/test_merge_index_as_string.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index b936fcb82098e..aeae0a32596b6 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -881,7 +881,7 @@ def test_merge_on_datetime64tz_empty(self): "date2": [pd.Timestamp("2019", tz=dtz.tz)], }, columns=["date", "value", "date2"], - ).astype({"date":"datetime64[ns, UTC]", "date2":"datetime64[ns, UTC]"}) + ).astype({"date": "datetime64[ns, UTC]", "date2": "datetime64[ns, UTC]"}) left = right[:0] result = left.merge(right, on="date") expected = DataFrame( diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index 0e22190ecf72b..ebe8f9ef1547b 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -186,5 +186,5 @@ def test_join_indexes_and_columns_on(df1, df2, left_index, join_type): right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" ) expected.index = result.index - if not (join_type=="outer" and left_index=="inner"): + if not (join_type == "outer" and left_index == "inner"): tm.assert_frame_equal(result, expected, check_like=True) From f18b2d261c56b7b5c20eb92d34504ab37d43ac38 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 03:02:12 +0530 Subject: [PATCH 11/20] updated asof join_index --- pandas/core/reshape/merge.py | 72 +++++++++++++++++------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e2f5e24ca6b67..2a6fd03f88cf2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -5,19 +5,27 @@ import copy import datetime -from functools import partial import string +import uuid +import warnings +from functools import partial from typing import ( TYPE_CHECKING, Hashable, Sequence, cast, ) -import uuid -import warnings import numpy as np +import pandas.core.algorithms as algos +import pandas.core.common as com +from pandas import ( + Categorical, + Index, + MultiIndex, + Series, +) from pandas._libs import ( Timedelta, hashtable as libhashtable, @@ -34,14 +42,9 @@ Suffixes, npt, ) -from pandas.errors import MergeError -from pandas.util._decorators import ( - Appender, - Substitution, - cache_readonly, -) -from pandas.util._exceptions import find_stack_level - +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.construction import extract_array from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( ensure_float64, @@ -71,31 +74,25 @@ isna, na_value_for_dtype, ) - -from pandas import ( - Categorical, - Index, - MultiIndex, - Series, -) -import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -import pandas.core.common as com -from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc from pandas.core.sorting import is_int64_overflow_possible +from pandas.errors import MergeError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level if TYPE_CHECKING: from pandas import DataFrame from pandas.core import groupby - from pandas.core.arrays import DatetimeArray @Substitution("\nleft : DataFrame or named Series") @Appender(_merge_doc, indents=0) def merge( - left: DataFrame | Series, + left: DataFrame | Series, right: DataFrame | Series, how: str = "inner", on: IndexLabel | None = None, @@ -477,8 +474,8 @@ def merge_asof( >>> pd.merge_asof(left, right, left_index=True, right_index=True) left_val right_val 1 a 1 - 3 b 3 - 7 c 7 + 5 b 3 + 10 c 7 Here is a real-world times-series example @@ -1042,7 +1039,15 @@ def _get_join_info( (left_indexer, right_indexer) = self._get_join_indexers() if self.right_index: - if len(self.left) > 0: + if self.how == "asof": + # GH#33463 asof should always behave like a left merge + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + how="right", + ) + elif len(self.left) > 0: join_index = self._create_join_index( self.right.index, self.left.index, @@ -1052,16 +1057,7 @@ def _get_join_info( else: join_index = self.right.index.take(right_indexer) elif self.left_index: - if self.how == "asof": - # GH#33463 asof should always behave like a left merge - join_index = self._create_join_index( - self.left.index, - self.right.index, - left_indexer, - how="left", - ) - - elif len(self.right) > 0: + if len(self.right) > 0: join_index = self._create_join_index( self.left.index, self.right.index, From 15f3c9feda6e0c58e04f4a0d6be09618c040c172 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 03:44:33 +0530 Subject: [PATCH 12/20] added another test and issue --- pandas/tests/reshape/merge/test_merge.py | 28 +++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index aeae0a32596b6..f188e77968f6d 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,20 +1,15 @@ +import re from datetime import ( date, datetime, timedelta, ) -import re import numpy as np import pytest -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_object_dtype, -) -from pandas.core.dtypes.dtypes import CategoricalDtype - import pandas as pd +import pandas._testing as tm from pandas import ( Categorical, CategoricalIndex, @@ -27,13 +22,17 @@ Series, TimedeltaIndex, ) -import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT from pandas.core.api import ( Float64Index, Int64Index, UInt64Index, ) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import ( MergeError, @@ -2738,3 +2737,16 @@ def test_join_leftindex_righton(): result = merge(left, right, how="left", left_index=True, right_on="x") expected = DataFrame(index=["a", "b"], columns=["x"], data=["a", np.nan]) tm.assert_frame_equal(result, expected) + + +def test_merge_lefton_rightindex(): + # GH 15692 + # GH 17257 + left = DataFrame(columns=["key", "col_left"]) + right = DataFrame({"col_right": ["a", "b", "c"]}) + result = left.merge(right, left_on="key", right_index=True, how="right") + expected = DataFrame( + {"key": [np.nan] * 3, "col_left": [np.nan] * 3, "col_right": ["a", "b", "c"]}, + dtype="object", + ) + tm.assert_frame_equal(result, expected) From a9760ffc570c8f113a6c5c160d3853cfd43150b4 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 04:28:52 +0530 Subject: [PATCH 13/20] cleanup precommit --- pandas/core/reshape/merge.py | 47 +++++++++++++----------- pandas/tests/reshape/merge/test_merge.py | 15 ++++---- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2a6fd03f88cf2..4d3946fdcdc75 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -5,27 +5,19 @@ import copy import datetime -import string -import uuid -import warnings from functools import partial +import string from typing import ( TYPE_CHECKING, Hashable, Sequence, cast, ) +import uuid +import warnings import numpy as np -import pandas.core.algorithms as algos -import pandas.core.common as com -from pandas import ( - Categorical, - Index, - MultiIndex, - Series, -) from pandas._libs import ( Timedelta, hashtable as libhashtable, @@ -42,9 +34,14 @@ Suffixes, npt, ) -from pandas.core.arrays import ExtensionArray -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -from pandas.core.construction import extract_array +from pandas.errors import MergeError +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level + from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( ensure_float64, @@ -74,25 +71,31 @@ isna, na_value_for_dtype, ) + +from pandas import ( + Categorical, + Index, + MultiIndex, + Series, +) +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.frame import _merge_doc from pandas.core.sorting import is_int64_overflow_possible -from pandas.errors import MergeError -from pandas.util._decorators import ( - Appender, - Substitution, - cache_readonly, -) -from pandas.util._exceptions import find_stack_level if TYPE_CHECKING: from pandas import DataFrame from pandas.core import groupby + from pandas.core.arrays import DatetimeArray @Substitution("\nleft : DataFrame or named Series") @Appender(_merge_doc, indents=0) def merge( - left: DataFrame | Series, + left: DataFrame | Series, right: DataFrame | Series, how: str = "inner", on: IndexLabel | None = None, diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f188e77968f6d..226093a314f7e 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,15 +1,20 @@ -import re from datetime import ( date, datetime, timedelta, ) +import re import numpy as np import pytest +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + import pandas as pd -import pandas._testing as tm from pandas import ( Categorical, CategoricalIndex, @@ -22,17 +27,13 @@ Series, TimedeltaIndex, ) +import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT from pandas.core.api import ( Float64Index, Int64Index, UInt64Index, ) -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_object_dtype, -) -from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import ( MergeError, From f096c0c2fb7c034a8e5dac1d0d1a440818869227 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 04:41:41 +0530 Subject: [PATCH 14/20] test cleanup --- pandas/tests/reshape/merge/test_merge_asof.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 92a9806d77cf9..76b71426e6a52 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -218,7 +218,6 @@ def test_basic_right_index(self, trades, asof, quotes): result = merge_asof( trades, quotes, left_on="time", right_index=True, by="ticker" ) - expected.index = result.index tm.assert_frame_equal(result, expected) def test_basic_left_index_right_index(self, trades, asof, quotes): @@ -230,7 +229,6 @@ def test_basic_left_index_right_index(self, trades, asof, quotes): result = merge_asof( trades, quotes, left_index=True, right_index=True, by="ticker" ) - expected.index = result.index tm.assert_frame_equal(result, expected) def test_multi_index_left(self, trades, quotes): @@ -453,7 +451,6 @@ def test_multiby_indexed(self): result = merge_asof( left, right, left_index=True, right_index=True, by=["k1", "k2"] ) - expected.index = result.index tm.assert_frame_equal(expected, result) with pytest.raises( @@ -716,7 +713,6 @@ def test_index_tolerance(self, trades, quotes, tolerance): by="ticker", tolerance=Timedelta("1day"), ) - expected.index = result.index tm.assert_frame_equal(result, expected) def test_allow_exact_matches(self, trades, quotes, allow_exact_matches): @@ -1404,7 +1400,6 @@ def test_merge_index_column_tz(self): }, index=Index([0, 1, 2, 3, 4]), ) - expected.index = result.index tm.assert_frame_equal(result, expected) def test_left_index_right_index_tolerance(self): @@ -1427,7 +1422,6 @@ def test_left_index_right_index_tolerance(self): right_index=True, tolerance=Timedelta(seconds=0.5), ) - expected.index = result.index tm.assert_frame_equal(result, expected) @@ -1486,7 +1480,6 @@ def test_merge_asof_index_behavior(kwargs): {"left": ["a", "b", "c"], "left_time": [1, 4, 10], "right": [1, 3, 7]}, index=index, ) - expected.index = result.index tm.assert_frame_equal(result, expected) From b1b2ac7f8a36f9caf073ccc58ed4cf6a47e4b320 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Sat, 29 Oct 2022 11:20:17 +0530 Subject: [PATCH 15/20] Update test_merge_asof.py --- pandas/tests/reshape/merge/test_merge_asof.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 76b71426e6a52..cd672340c5d37 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -451,6 +451,7 @@ def test_multiby_indexed(self): result = merge_asof( left, right, left_index=True, right_index=True, by=["k1", "k2"] ) + tm.assert_frame_equal(expected, result) with pytest.raises( From f37d3b4d998aa41a267db8c9ca290005c91a4db1 Mon Sep 17 00:00:00 2001 From: Shoham Debnath Date: Sat, 29 Oct 2022 11:22:17 +0530 Subject: [PATCH 16/20] cosmetic undo --- pandas/tests/reshape/merge/test_merge_asof.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index cd672340c5d37..5d21139bf44f8 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -451,7 +451,7 @@ def test_multiby_indexed(self): result = merge_asof( left, right, left_index=True, right_index=True, by=["k1", "k2"] ) - + tm.assert_frame_equal(expected, result) with pytest.raises( From 1cfa6272343b3379f61546a595c88196c2341c7b Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 20:21:23 +0530 Subject: [PATCH 17/20] undo unnecessary cast from tests --- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 9 ++------- pandas/tests/reshape/merge/test_merge_asof.py | 12 +++++------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index b6dd85a3451a8..30d89597b0608 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -728,7 +728,7 @@ def test_join_datetime_string(self): ], index=[2, 4], columns=["x", "y", "z", "a"], - ).astype({"x": "datetime64[ns]"}) + ) tm.assert_frame_equal(result, expected) def test_join_with_categorical_index(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 226093a314f7e..b1c472dfe014f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -860,11 +860,6 @@ def test_merge_datetime64tz_values(self): "value_y": [pd.NaT] + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), }, - ).astype( - { - "value_x": "datetime64[ns, US/Eastern]", - "value_y": "datetime64[ns, US/Eastern]", - } ) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) @@ -881,7 +876,7 @@ def test_merge_on_datetime64tz_empty(self): "date2": [pd.Timestamp("2019", tz=dtz.tz)], }, columns=["date", "value", "date2"], - ).astype({"date": "datetime64[ns, UTC]", "date2": "datetime64[ns, UTC]"}) + ) left = right[:0] result = left.merge(right, on="date") expected = DataFrame( @@ -978,7 +973,7 @@ def test_merge_period_values(self): "value_x": list(exp_x) + [pd.NaT], "value_y": [pd.NaT] + list(exp_y), } - ).astype({"value_x": "Period[D]", "value_y": "Period[D]"}) + ) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "Period[D]" diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 76b71426e6a52..31f347c686b17 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1287,7 +1287,7 @@ def test_merge_by_col_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], columns=["by_col", "on_col", "values_x", "values_y"], - ).astype({"by_col": "datetime64[ns, UTC]"}) + ) tm.assert_frame_equal(result, expected) def test_by_mixed_tz_aware(self): @@ -1312,7 +1312,7 @@ def test_by_mixed_tz_aware(self): expected = pd.DataFrame( [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], columns=["by_col1", "by_col2", "on_col", "value_x"], - ).astype({"by_col1": "datetime64[ns, UTC]"}) + ) expected["value_y"] = np.array([np.nan], dtype=object) tm.assert_frame_equal(result, expected) @@ -1537,7 +1537,7 @@ def test_merge_asof_array_as_on(): "a": [2, 6], "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], } - ).astype({"ts": "datetime64[ns]"}) + ) ts_merge = pd.date_range( start=pd.Timestamp("2021/01/01 00:00"), periods=3, freq="1h" ) @@ -1550,9 +1550,7 @@ def test_merge_asof_array_as_on(): allow_exact_matches=False, direction="backward", ) - expected = pd.DataFrame( - {"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge} - ).astype({"ts": "datetime64[ns]"}) + expected = pd.DataFrame({"b": [4, 8, 7], "a": [np.nan, 2, 6], "ts": ts_merge}) tm.assert_frame_equal(result, expected) result = merge_asof( @@ -1569,5 +1567,5 @@ def test_merge_asof_array_as_on(): "ts": [pd.Timestamp("2021/01/01 00:37"), pd.Timestamp("2021/01/01 01:40")], "b": [4, 8], } - ).astype({"ts": "datetime64[ns]"}) + ) tm.assert_frame_equal(result, expected) From a9b8412a7d645c68db5f60c41fa368adfb38c5b1 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 20:25:18 +0530 Subject: [PATCH 18/20] cosmetic change --- pandas/tests/reshape/merge/test_merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index b1c472dfe014f..72dd39861d777 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -859,7 +859,7 @@ def test_merge_datetime64tz_values(self): + [pd.NaT], "value_y": [pd.NaT] + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), - }, + } ) result = merge(left, right, on="key", how="outer") tm.assert_frame_equal(result, expected) From 95741d7a4150700b04563ad266d66c7c7a487400 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Sat, 29 Oct 2022 20:45:01 +0530 Subject: [PATCH 19/20] updated whatsnew --- doc/source/whatsnew/v1.5.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index aaf00804262bb..868cf9ce0d4ed 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`DataFrame.merge` which also affected :meth:`DataFrame.join`, when joining over index on one :class:`DataFrame` and column on the other :class:`DataFrame` returned incorrectly (:issue:`28243`, :issue:`15692`, :issue:`17257`) - .. --------------------------------------------------------------------------- From ced3c5bf0bfab4420a1f85113f8c551db7d23d42 Mon Sep 17 00:00:00 2001 From: debnathshoham Date: Mon, 31 Oct 2022 15:11:24 +0530 Subject: [PATCH 20/20] whatsnew to 2.0.0 --- doc/source/whatsnew/v1.5.2.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index 868cf9ce0d4ed..aaf00804262bb 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- Bug in :meth:`DataFrame.merge` which also affected :meth:`DataFrame.join`, when joining over index on one :class:`DataFrame` and column on the other :class:`DataFrame` returned incorrectly (:issue:`28243`, :issue:`15692`, :issue:`17257`) +- - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5614b7a2c0846..c60ce7190939f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -425,6 +425,7 @@ Reshaping - Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`) - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) +- Bug in :meth:`DataFrame.merge` which also affected :meth:`DataFrame.join`, when joining over index on one :class:`DataFrame` and column on the other :class:`DataFrame` returned incorrectly (:issue:`28243`, :issue:`15692`, :issue:`17257`) - Sparse