From 83cfd98bf641521a2e6e87fd7496721f147bb4fb Mon Sep 17 00:00:00 2001 From: Gabriel Simonetto <42247511+GabrielSimonetto@users.noreply.github.com> Date: Wed, 14 Oct 2020 20:31:37 -0300 Subject: [PATCH 1/7] Add test_masking_duplicate_columns --- pandas/tests/frame/test_nonunique_indexes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index a8b76f4d85f49..f213fedac30e2 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -513,3 +513,9 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) + + def test_masking_duplicate_columns(self): + # https://github.com/pandas-dev/pandas/issues/31954 + df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=[0, 0]) + expected = DataFrame([[np.nan, np.nan], [np.nan, 3.0]], columns=[0, 0]) + tm.assert_frame_equal(df[df > 2], expected) From 16e47c3a7b53bdbea056644fbbaec29363b11b30 Mon Sep 17 00:00:00 2001 From: Gabriel Simonetto Date: Fri, 16 Oct 2020 17:22:33 -0300 Subject: [PATCH 2/7] Update test to use mixed dtypes --- pandas/tests/frame/test_nonunique_indexes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index f213fedac30e2..2c411e40ef0ea 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -514,8 +514,8 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) - def test_masking_duplicate_columns(self): + def test_masking_duplicate_columns_mixed_dtypes(self): # https://github.com/pandas-dev/pandas/issues/31954 - df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=[0, 0]) + df = pd.DataFrame([[0.0, 1], [2.0, 3]], columns=[0, 0]) expected = DataFrame([[np.nan, np.nan], [np.nan, 3.0]], columns=[0, 0]) tm.assert_frame_equal(df[df > 2], expected) From e6ab56681de9aa9d4560cf1177031266582f64b5 Mon Sep 17 00:00:00 2001 From: Gabriel Simonetto Date: Fri, 16 Oct 2020 20:23:35 -0300 Subject: [PATCH 3/7] Requested Changes --- pandas/tests/frame/test_nonunique_indexes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 2c411e40ef0ea..68d219327a5d3 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -516,6 +516,7 @@ def test_set_value_by_index(self): def test_masking_duplicate_columns_mixed_dtypes(self): # https://github.com/pandas-dev/pandas/issues/31954 - df = pd.DataFrame([[0.0, 1], [2.0, 3]], columns=[0, 0]) + df = DataFrame([[0.0, 1], [2.0, 3]], columns=[0, 0]) expected = DataFrame([[np.nan, np.nan], [np.nan, 3.0]], columns=[0, 0]) - tm.assert_frame_equal(df[df > 2], expected) + result = df[df > 2] + tm.assert_frame_equal(result, expected) From fc746a413ba68be6926fce8c91e7e72cc08fcc1c Mon Sep 17 00:00:00 2001 From: Gabriel Simonetto Date: Sun, 18 Oct 2020 14:46:01 -0300 Subject: [PATCH 4/7] Add np constructor to test --- pandas/tests/frame/test_nonunique_indexes.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 68d219327a5d3..3c6686719ab2d 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -516,7 +516,7 @@ def test_set_value_by_index(self): def test_masking_duplicate_columns_mixed_dtypes(self): # https://github.com/pandas-dev/pandas/issues/31954 - df = DataFrame([[0.0, 1], [2.0, 3]], columns=[0, 0]) - expected = DataFrame([[np.nan, np.nan], [np.nan, 3.0]], columns=[0, 0]) + df = DataFrame(np.array([[0.0, 1], [2.0, 3]]), columns=[0, 0]) + expected = DataFrame(np.array([[np.nan, np.nan], [np.nan, 3.0]]), columns=[0, 0]) result = df[df > 2] - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) \ No newline at end of file From b62fe2e04b863e9453375be4c7c839d5350c0867 Mon Sep 17 00:00:00 2001 From: Gabriel Simonetto Date: Sun, 18 Oct 2020 15:13:51 -0300 Subject: [PATCH 5/7] Black changes --- pandas/tests/frame/test_nonunique_indexes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 3c6686719ab2d..68d0e7cffe7a1 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -517,6 +517,8 @@ def test_set_value_by_index(self): def test_masking_duplicate_columns_mixed_dtypes(self): # https://github.com/pandas-dev/pandas/issues/31954 df = DataFrame(np.array([[0.0, 1], [2.0, 3]]), columns=[0, 0]) - expected = DataFrame(np.array([[np.nan, np.nan], [np.nan, 3.0]]), columns=[0, 0]) + expected = DataFrame( + np.array([[np.nan, np.nan], [np.nan, 3.0]]), columns=[0, 0] + ) result = df[df > 2] - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) From d637b1b871f512ea5133abc0511864c6bfab13c9 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 23:54:39 -0500 Subject: [PATCH 6/7] review comments: exact OP --- pandas/tests/frame/test_nonunique_indexes.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 8eaee9bf7dfca..0194ba5f4d896 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -488,10 +488,15 @@ def test_set_value_by_index(self): tm.assert_series_equal(df.iloc[:, 1], expected) def test_masking_duplicate_columns_mixed_dtypes(self): - # https://github.com/pandas-dev/pandas/issues/31954 - df = DataFrame(np.array([[0.0, 1], [2.0, 3]]), columns=[0, 0]) + # GH31954 + + df1 = DataFrame(np.array([[1, 2], [3, 4]])) + df2 = DataFrame(np.array([[0.5, 6], [7, 8]])) + df = pd.concat([df1, df2], axis=1) + + result = df[df > 2] expected = DataFrame( - np.array([[np.nan, np.nan], [np.nan, 3.0]]), columns=[0, 0] + np.array([[np.nan, np.nan, np.nan, 6.0], [3.0, 4.0, 7.0, 8.0]]), + columns=pd.Int64Index([0, 1, 0, 1], dtype="int64"), ) - result = df[df > 2] tm.assert_frame_equal(result, expected) From 4819df4f58daabcf6eeb6ca9425c6b52698df758 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 8 Dec 2020 00:45:35 -0500 Subject: [PATCH 7/7] parametrize test --- pandas/tests/frame/test_nonunique_indexes.py | 31 ++++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 0194ba5f4d896..8dcf6f2188058 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -487,16 +487,35 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) - def test_masking_duplicate_columns_mixed_dtypes(self): + @pytest.mark.parametrize( + "data1,data2,expected_data", + ( + ( + [[1, 2], [3, 4]], + [[0.5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], + ), + ( + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], + ), + ), + ) + def test_masking_duplicate_columns_mixed_dtypes( + self, + data1, + data2, + expected_data, + ): # GH31954 - df1 = DataFrame(np.array([[1, 2], [3, 4]])) - df2 = DataFrame(np.array([[0.5, 6], [7, 8]])) + df1 = DataFrame(np.array(data1)) + df2 = DataFrame(np.array(data2)) df = pd.concat([df1, df2], axis=1) result = df[df > 2] expected = DataFrame( - np.array([[np.nan, np.nan, np.nan, 6.0], [3.0, 4.0, 7.0, 8.0]]), - columns=pd.Int64Index([0, 1, 0, 1], dtype="int64"), - ) + {i: np.array(col) for i, col in enumerate(expected_data)} + ).rename(columns={2: 0, 3: 1}) tm.assert_frame_equal(result, expected)