Skip to content

Backport PR #56013 on branch 2.3.x (BUG: get_indexer rountripping through string dtype) #60339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Interval

Indexing
^^^^^^^^
-
- Fixed bug in :meth:`Index.get_indexer` round-tripping through string dtype when ``infer_string`` is enabled (:issue:`55834`)
-

Missing
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6695,7 +6695,16 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
"""
Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
"""
return ensure_index(target)
target_index = ensure_index(target)
if (
not hasattr(target, "dtype")
and self.dtype == object
and target_index.dtype == "string"
):
# If we started with a list-like, avoid inference to string dtype if self
# is object dtype (coercing to string dtype will alter the missing values)
target_index = Index(target, dtype=self.dtype)
return target_index

@final
def _validate_indexer(
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexes/object/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ def test_get_indexer_with_NA_values(
expected = np.array([0, 1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)

def test_get_indexer_infer_string_missing_values(self):
# ensure the passed list is not cast to string but to object so that
# the None value is matched in the index
# https://github.com/pandas-dev/pandas/issues/55834
idx = Index(["a", "b", None], dtype="object")
result = idx.get_indexer([None, "x"])
expected = np.array([2, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)


class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(self, nulls_fixture):
Expand Down
Loading