From 03450d9a8ea111789e8aa76cbcaa91316f827b11 Mon Sep 17 00:00:00 2001 From: Kasim Date: Tue, 29 Dec 2020 04:57:01 +0000 Subject: [PATCH 1/3] BUG: Added test cases to check loc on a multiindex with nan values #29751 --- .../tests/indexing/multiindex/test_getitem.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 144df1e28f8b6..c190a91c74195 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,6 +1,7 @@ import numpy as np import pytest +import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -279,3 +280,32 @@ def test_loc_empty_multiindex(): result = df expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nan", [np.nan, pd.NA, None]) +def test_loc_nan_multiindex(nan): + # GH#29751 + # tests to check loc on a multiindex containing nan values + arr = [ + [11, nan, 13], + [21, nan, 23], + [31, nan, 33], + [41, nan, 43], + [51, nan, 53], + ] + cols = ["a", "b", "c"] + df = DataFrame(arr, columns=cols, dtype="int").set_index(["a", "b"]) + idx = df.index[1] + result = df.loc[:idx, :] + expected = DataFrame(arr[:2], columns=cols, dtype="int").set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.loc[idx:, :] + expected = DataFrame(arr[1:], columns=cols).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + idx1 = df.index[1] + idx2 = df.index[3] + result = df.loc[idx1:idx2, :] + expected = DataFrame(arr[1:4], columns=cols).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) From 988ff303addeeec1d5b1453971e2255cadfa0dd9 Mon Sep 17 00:00:00 2001 From: Kasim Date: Mon, 4 Jan 2021 09:05:46 +0000 Subject: [PATCH 2/3] BUG: Added nulls_fixture, parametrized slices, and added fixed dtype in test #29751 --- .../tests/indexing/multiindex/test_getitem.py | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index c190a91c74195..9b877d769cda1 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,6 @@ import numpy as np import pytest -import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -282,30 +281,23 @@ def test_loc_empty_multiindex(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("nan", [np.nan, pd.NA, None]) -def test_loc_nan_multiindex(nan): +@pytest.mark.parametrize("idx1,idx2", [(0, 1), (1, 4), (1, 3)]) +def test_loc_nan_multiindex(nulls_fixture, idx1, idx2): # GH#29751 - # tests to check loc on a multiindex containing nan values + # loc on a multiindex containing nan values arr = [ - [11, nan, 13], - [21, nan, 23], - [31, nan, 33], - [41, nan, 43], - [51, nan, 53], + [11, nulls_fixture, 13], + [21, nulls_fixture, 23], + [31, nulls_fixture, 33], + [41, nulls_fixture, 43], + [51, nulls_fixture, 53], ] cols = ["a", "b", "c"] - df = DataFrame(arr, columns=cols, dtype="int").set_index(["a", "b"]) - idx = df.index[1] - result = df.loc[:idx, :] - expected = DataFrame(arr[:2], columns=cols, dtype="int").set_index(["a", "b"]) - tm.assert_frame_equal(result, expected) - - result = df.loc[idx:, :] - expected = DataFrame(arr[1:], columns=cols).set_index(["a", "b"]) - tm.assert_frame_equal(result, expected) - - idx1 = df.index[1] - idx2 = df.index[3] - result = df.loc[idx1:idx2, :] - expected = DataFrame(arr[1:4], columns=cols).set_index(["a", "b"]) + df = DataFrame(arr, columns=cols, dtype="int64").set_index(["a", "b"]) + start = df.index[idx1] + end = df.index[idx2] + result = df.loc[start:end, :] + expected = DataFrame(arr[idx1 : (idx2 + 1)], columns=cols, dtype="int64").set_index( + ["a", "b"] + ) tm.assert_frame_equal(result, expected) From 98d70f056f29d2e7953acb2bea1b7097a2b615c5 Mon Sep 17 00:00:00 2001 From: Kasim Date: Thu, 7 Jan 2021 02:31:55 +0000 Subject: [PATCH 3/3] BUG: Added test cases from original post, hardcoded expected values #29751 --- .../tests/indexing/multiindex/test_getitem.py | 54 +++++++++++-------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 9b877d769cda1..6c0d1c285acf3 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -197,6 +197,38 @@ def test_frame_mixed_depth_get(): tm.assert_series_equal(result, expected) +def test_frame_getitem_nan_multiindex(nulls_fixture): + # GH#29751 + # loc on a multiindex containing nan values + n = nulls_fixture # for code readability + cols = ["a", "b", "c"] + df = DataFrame( + [[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]], + columns=cols, + dtype="int64", + ).set_index(["a", "b"]) + + idx = (21, n) + result = df.loc[:idx] + expected = DataFrame( + [[11, n, 13], [21, n, 23]], columns=cols, dtype="int64" + ).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.loc[idx:] + expected = DataFrame( + [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols, dtype="int64" + ).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + idx1, idx2 = (21, n), (31, n) + result = df.loc[idx1:idx2] + expected = DataFrame( + [[21, n, 23], [31, n, 33]], columns=cols, dtype="int64" + ).set_index(["a", "b"]) + tm.assert_frame_equal(result, expected) + + # ---------------------------------------------------------------------------- # test indexing of DataFrame with multi-level Index with duplicates # ---------------------------------------------------------------------------- @@ -279,25 +311,3 @@ def test_loc_empty_multiindex(): result = df expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"]) tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize("idx1,idx2", [(0, 1), (1, 4), (1, 3)]) -def test_loc_nan_multiindex(nulls_fixture, idx1, idx2): - # GH#29751 - # loc on a multiindex containing nan values - arr = [ - [11, nulls_fixture, 13], - [21, nulls_fixture, 23], - [31, nulls_fixture, 33], - [41, nulls_fixture, 43], - [51, nulls_fixture, 53], - ] - cols = ["a", "b", "c"] - df = DataFrame(arr, columns=cols, dtype="int64").set_index(["a", "b"]) - start = df.index[idx1] - end = df.index[idx2] - result = df.loc[start:end, :] - expected = DataFrame(arr[idx1 : (idx2 + 1)], columns=cols, dtype="int64").set_index( - ["a", "b"] - ) - tm.assert_frame_equal(result, expected)