Skip to content

Commit 0785383

Browse files
committed
While using the function set_index with the parameter inplace=True, the function attempted to create a new index with a dtype of FLS
S{value}. This dtype was not recognized by the function _dtype_to_subclass, which raised a NotImplementedError. To address this, I added a verification to the function asarray_tuplesafe that converts data to an array with object type, allowing the index to be created succes sfully. Additionally, I created a new test and simplified a previously created test. I also reverted the test file test_parquet.py to restore the intended FLS behavior.
1 parent ff0b740 commit 0785383

File tree

6 files changed

+22
-14
lines changed

6 files changed

+22
-14
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ Performance improvements
378378

379379
Bug fixes
380380
~~~~~~~~~
381-
- Fixed bug in :class:`Index` Index constructor did not allow FLS as indices. (:issue:`57645`)
381+
- Fixed bug in :class:`Index` Index constructor was not converting FLS to object. (:issue:`57645`)
382382
- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
383383
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
384384
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)

pandas/core/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi
236236
values = list(values)
237237
elif isinstance(values, ABCIndex):
238238
return values._values
239-
elif isinstance(values, ABCSeries):
239+
elif isinstance(values, ABCSeries) and values.dtype != "S":
240240
return values._values
241241

242242
if isinstance(values, list) and dtype in [np.object_, object]:

pandas/core/indexes/base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -625,9 +625,7 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
625625
# NB: assuming away MultiIndex
626626
return Index
627627

628-
elif (
629-
dtype.kind == "S" or issubclass(dtype.type, str) or is_numeric_dtype(dtype)
630-
):
628+
elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
631629
return Index
632630

633631
raise NotImplementedError(dtype)

pandas/tests/frame/methods/test_set_index.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -618,17 +618,13 @@ def test_set_index_raise_on_len(
618618
df.set_index(["A", df.A, box(values)], drop=drop, append=append)
619619

620620
def test_set_index_with_FLS_Dtype(self):
621-
string_length = 6
622-
in_dtype, df_name = f"S{string_length}", "fruit"
623-
data = ["apple", "banana", "orange", "grape"]
624-
625621
# Create array with FLS(|S{value}) dtype
626-
arr = np.array(data, dtype=in_dtype)
627-
df = DataFrame(Series(arr), columns=[df_name])
622+
arr = np.array(["apple", "banana", "orange", "grape"], dtype="S6")
623+
df = DataFrame(Series(arr), columns=["fruits"])
628624

629625
# This will create a new Index with FLS dtype
630-
expected = Index(data=Series(arr), name=df_name)
631-
df.set_index(df_name, inplace=True)
626+
expected = Index(data=Series(arr), name="fruits")
627+
df.set_index("fruits", inplace=True)
632628
tm.assert_index_equal(df.index, expected)
633629

634630

pandas/tests/indexes/test_index_new.py

+12
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,18 @@ def test_constructor_datetimes_mixed_tzs(self):
185185
expected = Index([dt1, dt2], dtype=object)
186186
tm.assert_index_equal(result, expected)
187187

188+
def test_FLS_to_object_conversion(self):
189+
# Create NumPy array of fixed-length strings
190+
arr = np.array(["apple", "banana", "orange", "grape"], dtype="S6")
191+
# Create expected array for index
192+
expected_arr = np.array(
193+
[b"apple", b"banana", b"orange", b"grape"], dtype=object
194+
)
195+
# This will create a new Index with FLS dtype
196+
index = Index(data=Series(arr), name="fruits")
197+
expected = Index(data=Series(expected_arr), name="fruits")
198+
tm.assert_index_equal(index, expected)
199+
188200

189201
class TestDtypeEnforced:
190202
# check we don't silently ignore the dtype keyword

pandas/tests/io/test_parquet.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,9 @@ def test_columns_dtypes_not_invalid(self, pa):
10271027

10281028
# bytes
10291029
df.columns = [b"foo", b"bar"]
1030-
check_round_trip(df, pa)
1030+
with pytest.raises(NotImplementedError, match="|S3"):
1031+
# Bytes fails on read_parquet
1032+
check_round_trip(df, pa)
10311033

10321034
# python object
10331035
df.columns = [

0 commit comments

Comments
 (0)