Skip to content

Commit 639bd66

Browse files
authored
BUG: setitem casting object Index to arrow strings (#55639)
1 parent e8d9a32 commit 639bd66

File tree

5 files changed

+33
-2
lines changed

5 files changed

+33
-2
lines changed

doc/source/whatsnew/v2.1.4.rst

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ Fixed regressions
2222
Bug fixes
2323
~~~~~~~~~
2424
- Bug in :class:`Series` constructor raising DeprecationWarning when ``index`` is a list of :class:`Series` (:issue:`55228`)
25+
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
26+
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
2527
-
2628

2729
.. ---------------------------------------------------------------------------

pandas/core/construction.py

+12
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from pandas.core.dtypes.common import (
5050
is_list_like,
5151
is_object_dtype,
52+
is_string_dtype,
5253
pandas_dtype,
5354
)
5455
from pandas.core.dtypes.dtypes import NumpyEADtype
@@ -548,6 +549,10 @@ def sanitize_array(
548549
# Avoid ending up with a NumpyExtensionArray
549550
dtype = dtype.numpy_dtype
550551

552+
object_index = False
553+
if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
554+
object_index = True
555+
551556
# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
552557
data = extract_array(data, extract_numpy=True, extract_range=True)
553558

@@ -601,6 +606,13 @@ def sanitize_array(
601606
subarr = data
602607
if data.dtype == object:
603608
subarr = maybe_infer_to_datetimelike(data)
609+
if (
610+
object_index
611+
and using_pyarrow_string_dtype()
612+
and is_string_dtype(subarr)
613+
):
614+
# Avoid inference when string option is set
615+
subarr = data
604616
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
605617
from pandas.core.arrays.string_ import StringDtype
606618

pandas/core/indexes/base.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pandas._config import (
2424
get_option,
2525
using_copy_on_write,
26+
using_pyarrow_string_dtype,
2627
)
2728

2829
from pandas._libs import (
@@ -6919,7 +6920,14 @@ def insert(self, loc: int, item) -> Index:
69196920
loc = loc if loc >= 0 else loc - 1
69206921
new_values[loc] = item
69216922

6922-
return Index._with_infer(new_values, name=self.name)
6923+
idx = Index._with_infer(new_values, name=self.name)
6924+
if (
6925+
using_pyarrow_string_dtype()
6926+
and is_string_dtype(idx.dtype)
6927+
and new_values.dtype == object
6928+
):
6929+
idx = idx.astype(new_values.dtype)
6930+
return idx
69236931

69246932
def drop(
69256933
self,

pandas/tests/frame/indexing/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1930,7 +1930,7 @@ def test_add_new_column_infer_string():
19301930
df.loc[df["x"] == 1, "y"] = "1"
19311931
expected = DataFrame(
19321932
{"x": [1], "y": Series(["1"], dtype="string[pyarrow_numpy]")},
1933-
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"),
1933+
columns=Index(["x", "y"], dtype=object),
19341934
)
19351935
tm.assert_frame_equal(df, expected)
19361936

pandas/tests/frame/indexing/test_setitem.py

+9
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,15 @@ def test_setitem_frame_overwrite_with_ea_dtype(self, any_numeric_ea_dtype):
753753
)
754754
tm.assert_frame_equal(df, expected)
755755

756+
def test_setitem_string_option_object_index(self):
757+
# GH#55638
758+
pytest.importorskip("pyarrow")
759+
df = DataFrame({"a": [1, 2]})
760+
with pd.option_context("future.infer_string", True):
761+
df["b"] = Index(["a", "b"], dtype=object)
762+
expected = DataFrame({"a": [1, 2], "b": Series(["a", "b"], dtype=object)})
763+
tm.assert_frame_equal(df, expected)
764+
756765
def test_setitem_frame_midx_columns(self):
757766
# GH#49121
758767
df = DataFrame({("a", "b"): [10]})

0 commit comments

Comments
 (0)