From 97d4358d3cc6dc05ecf33c1b5f27c25324ff4aaf Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Wed, 9 Feb 2022 13:26:45 +0800 Subject: [PATCH 1/9] add test for DataFrame.join() DataFrame.join() with duplicate indices should raise now and add test for this. --- pandas/tests/frame/methods/test_join.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index c6bfd94b84908..0b240fd03b19f 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -12,6 +12,7 @@ period_range, ) import pandas._testing as tm +from pandas.errors import InvalidIndexError @pytest.fixture @@ -369,3 +370,10 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, expected) assert result.index.tz.zone == "US/Central" + + def test_join_duplicate_indices(self): + # https://github.com/pandas-dev/pandas/issues/36263 + df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) + df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) + with pytest.raises(InvalidIndexError): + df1.join(df2, how="outer") From 2ddfc99def493d13a1a60d0ce5c609579e48f236 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Wed, 9 Feb 2022 14:02:35 +0800 Subject: [PATCH 2/9] Update test_join.py catch concat's raise, not join --- pandas/tests/frame/methods/test_join.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 0b240fd03b19f..7b810fbea8b64 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -371,9 +371,9 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, expected) assert result.index.tz.zone == "US/Central" - def test_join_duplicate_indices(self): + def test_duplicate_indices_concat_raise(self): # https://github.com/pandas-dev/pandas/issues/36263 df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) with pytest.raises(InvalidIndexError): - df1.join(df2, how="outer") + pd.concat([df1, df2], axis=1) From 9381eb824d18076e9d5b98014a61681fb88d56dd Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Wed, 9 Feb 2022 06:22:41 +0000 Subject: [PATCH 3/9] Fixes from pre-commit [automated commit] --- pandas/tests/frame/methods/test_join.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 7b810fbea8b64..07fbe2a7ec27a 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import InvalidIndexError + import pandas as pd from pandas import ( DataFrame, @@ -12,7 +14,6 @@ period_range, ) import pandas._testing as tm -from pandas.errors import InvalidIndexError @pytest.fixture From 1037cb9dcbb6d762b98ca4d309c7aef4f0c36461 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Thu, 10 Feb 2022 10:25:00 +0800 Subject: [PATCH 4/9] Update test_join.py add msg for pytest.raises for Code Check --- pandas/tests/frame/methods/test_join.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 7b810fbea8b64..54cdcf2cb9e81 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -375,5 +375,6 @@ def test_duplicate_indices_concat_raise(self): # https://github.com/pandas-dev/pandas/issues/36263 df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) - with pytest.raises(InvalidIndexError): + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): pd.concat([df1, df2], axis=1) From 89e919d785628e51eead3f88f5c3453020ea7370 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Fri, 11 Feb 2022 10:18:57 +0800 Subject: [PATCH 5/9] move new test to concat --- pandas/tests/frame/methods/test_join.py | 10 ---------- pandas/tests/reshape/concat/test_concat.py | 10 +++++++++- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 36c3a31f3875d..c6bfd94b84908 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError - import pandas as pd from pandas import ( DataFrame, @@ -371,11 +369,3 @@ def test_frame_join_tzaware(self): tm.assert_index_equal(result.index, expected) assert result.index.tz.zone == "US/Central" - - def test_duplicate_indices_concat_raise(self): - # https://github.com/pandas-dev/pandas/issues/36263 - df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) - df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) - msg = "Reindexing only valid with uniquely valued Index objects" - with pytest.raises(InvalidIndexError, match=msg): - pd.concat([df1, df2], axis=1) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index cafb119f0c4ba..58b1371cf16ca 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -11,7 +11,7 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning +from pandas.errors import InvalidIndexError, PerformanceWarning import pandas as pd from pandas import ( @@ -489,6 +489,14 @@ def test_concat_ordered_dict(self): ) result = concat({"First": Series(range(3)), "Another": Series(range(4))}) tm.assert_series_equal(result, expected) + + def test_concat_duplicate_indices_raise(self): + # https://github.com/pandas-dev/pandas/issues/36263 + df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) + df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + pd.concat([df1, df2], axis=1) @pytest.mark.parametrize("pdt", [Series, DataFrame]) From 9c132f05e44ebd7e1478386c3e01f6164d03f364 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Fri, 11 Feb 2022 10:34:38 +0800 Subject: [PATCH 6/9] Update test_concat.py remove whitespace in blank line --- pandas/tests/reshape/concat/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 58b1371cf16ca..aac1cfd0d03b0 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -489,7 +489,7 @@ def test_concat_ordered_dict(self): ) result = concat({"First": Series(range(3)), "Another": Series(range(4))}) tm.assert_series_equal(result, expected) - + def test_concat_duplicate_indices_raise(self): # https://github.com/pandas-dev/pandas/issues/36263 df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) From f43dc7fad62a4889bd05e49ec282b09d52316ea9 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Fri, 11 Feb 2022 04:03:31 +0000 Subject: [PATCH 7/9] Fixes from pre-commit [automated commit] --- pandas/tests/reshape/concat/test_concat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index aac1cfd0d03b0..0223ab14b0c38 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -11,7 +11,10 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError, PerformanceWarning +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, +) import pandas as pd from pandas import ( From 64ee301c9045a6febd40dc7bae3a67ea51a63a32 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Fri, 11 Feb 2022 12:09:13 +0800 Subject: [PATCH 8/9] Update test_concat.py add detailed description for test, also tend to trigger all checks --- pandas/tests/reshape/concat/test_concat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index aac1cfd0d03b0..389d7fa911d2f 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -491,6 +491,7 @@ def test_concat_ordered_dict(self): tm.assert_series_equal(result, expected) def test_concat_duplicate_indices_raise(self): + # GH 45888: test raise for concat DataFrames with duplicate indices # https://github.com/pandas-dev/pandas/issues/36263 df1 = DataFrame(np.random.randn(5), index=[0, 1, 2, 3, 3], columns=["a"]) df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) From 05abb0cd98a54ebf6dd2c6117b3f645fec154e15 Mon Sep 17 00:00:00 2001 From: Zhengfei Wang Date: Fri, 11 Feb 2022 12:51:24 +0800 Subject: [PATCH 9/9] Update test_concat.py use concat directly instead of pd.concat for flake8 --- pandas/tests/reshape/concat/test_concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index f1a8d46362049..eb44b4889afb8 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -500,7 +500,7 @@ def test_concat_duplicate_indices_raise(self): df2 = DataFrame(np.random.randn(5), index=[0, 1, 2, 2, 4], columns=["b"]) msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): - pd.concat([df1, df2], axis=1) + concat([df1, df2], axis=1) @pytest.mark.parametrize("pdt", [Series, DataFrame])