From f7c8d072921b053f2f98e76868cc3722ad78eccb Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 6 Sep 2022 13:20:07 +0200 Subject: [PATCH 1/4] REGR: get_loc for ExtensionEngine not returning bool indexer for na --- pandas/_libs/index.pyx | 2 +- pandas/tests/indexes/test_indexing.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 0cf7c4d45c634..617760c2981c4 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1061,7 +1061,7 @@ cdef class ExtensionEngine(SharedEngine): cdef ndarray _get_bool_indexer(self, val): if checknull(val): - return self.values.isna().view("uint8") + return self.values.isna() try: return self.values == val diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 739039241a31d..67c273a5fb50f 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -20,6 +20,7 @@ from pandas.errors import InvalidIndexError from pandas import ( + NA, DatetimeIndex, Index, IntervalIndex, @@ -221,6 +222,13 @@ def test_get_loc_generator(self, index): # MultiIndex specifically checks for generator; others for scalar index.get_loc(x for x in range(5)) + def test_get_loc_masked_duplicated_na(self): + # GH# + idx = Index([1, 2, NA, NA], dtype="Int64") + result = idx.get_loc(NA) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + class TestGetIndexer: def test_get_indexer_base(self, index): @@ -253,6 +261,13 @@ def test_get_indexer_consistency(self, index): assert isinstance(indexer, np.ndarray) assert indexer.dtype == np.intp + def test_get_indexer_masked_duplicated_na(self): + # GH# + idx = Index([1, 2, NA, NA], dtype="Int64") + result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) + expected = np.array([0, 2, 3]) + tm.assert_numpy_array_equal(result, expected) + class TestConvertSliceIndexer: def test_convert_almost_null_slice(self, index): From 7a6ddb9f8412b5842d17cc17d4ac2335589cb4b8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 6 Sep 2022 13:22:14 +0200 Subject: [PATCH 2/4] Add gh ref --- pandas/tests/indexes/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 67c273a5fb50f..baca4b2e39187 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -223,7 +223,7 @@ def test_get_loc_generator(self, index): index.get_loc(x for x in range(5)) def test_get_loc_masked_duplicated_na(self): - # GH# + # GH#48411 idx = Index([1, 2, NA, NA], dtype="Int64") result = idx.get_loc(NA) expected = np.array([False, False, True, True]) @@ -262,7 +262,7 @@ def test_get_indexer_consistency(self, index): assert indexer.dtype == np.intp def test_get_indexer_masked_duplicated_na(self): - # GH# + # GH#48411 idx = Index([1, 2, NA, NA], dtype="Int64") result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) expected = np.array([0, 2, 3]) From f1aaf6f0f42cae71375fe660acd70de050e16ce2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 6 Sep 2022 15:43:05 +0200 Subject: [PATCH 3/4] Fix dtype --- pandas/tests/indexes/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index baca4b2e39187..72c80560e2fed 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -265,7 +265,7 @@ def test_get_indexer_masked_duplicated_na(self): # GH#48411 idx = Index([1, 2, NA, NA], dtype="Int64") result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) - expected = np.array([0, 2, 3]) + expected = np.array([0, 2, 3], dtype="int64") tm.assert_numpy_array_equal(result, expected) From 3fb16dff4df92a908be05ccb88e4add59f0e45ef Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Tue, 6 Sep 2022 22:34:40 +0200 Subject: [PATCH 4/4] Fix dtype --- pandas/tests/indexes/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 72c80560e2fed..ab934df992d61 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -265,7 +265,7 @@ def test_get_indexer_masked_duplicated_na(self): # GH#48411 idx = Index([1, 2, NA, NA], dtype="Int64") result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) - expected = np.array([0, 2, 3], dtype="int64") + expected = np.array([0, 2, 3], dtype=result.dtype) tm.assert_numpy_array_equal(result, expected)