From 94099737ddf8b24329755223115668152d68f62f Mon Sep 17 00:00:00 2001
From: Pedro Freitas <pedrogmfreitas@tecnico.ulisboa.pt>
Date: Mon, 25 Mar 2024 17:36:38 +0000
Subject: [PATCH 1/3] FIX #57645: Cannot use numpy FLS as indicies since pandas
 2.2.1

While using the function set_index with parameter inplace=True, the function would try and create a new index where its dtype would be a FLS S{value} dtype, which was not recognized by the function _dtype_to_subclass and raised a NotImplementedError. That said , by adding a verification that recognizes FLS dtype , the index is created successfully and the function executes properly.
---
 doc/source/whatsnew/v3.0.0.rst               |  9 +++++++++
 pandas/core/indexes/base.py                  |  4 +++-
 pandas/tests/frame/methods/test_set_index.py | 14 ++++++++++++++
 pandas/tests/io/test_parquet.py              |  4 +---
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7da6d43c732a9..d7aafcd192374 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -378,6 +378,15 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
+- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
+- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
+- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
+- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
+- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
+- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
+- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
+- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
+- Fixed bug in :class:`Index` Index constructor did not allow FLS as indicies. (:issue:`57645`) 
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ebbd85be44009..d32a6393c8edf 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -625,7 +625,9 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
             # NB: assuming away MultiIndex
             return Index
 
-        elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
+        elif (
+            dtype.kind == "S" or issubclass(dtype.type, str) or is_numeric_dtype(dtype)
+        ):
             return Index
 
         raise NotImplementedError(dtype)
diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
index 198cab0e91eab..be8a2dfa20704 100644
--- a/pandas/tests/frame/methods/test_set_index.py
+++ b/pandas/tests/frame/methods/test_set_index.py
@@ -617,6 +617,20 @@ def test_set_index_raise_on_len(
         with pytest.raises(ValueError, match=msg):
             df.set_index(["A", df.A, box(values)], drop=drop, append=append)
 
+    def test_set_index_with_FLS_Dtype(self):
+        string_length = 6
+        in_dtype, df_name = f"S{string_length}", "fruit"
+        data = ["apple", "banana", "orange", "grape"]
+
+        # Create array with FLS(|S{value}) dtype
+        arr = np.array(data, dtype=in_dtype)
+        df = DataFrame(Series(arr), columns=[df_name])
+
+        # This will create a new Index with FLS dtype
+        expected = Index(data=Series(arr), name=df_name)
+        df.set_index(df_name, inplace=True)
+        tm.assert_index_equal(df.index, expected)
+
 
 class TestSetIndexCustomLabelType:
     def test_set_index_custom_label_type(self):
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 2860b3a6483af..398561f22d2cb 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1027,9 +1027,7 @@ def test_columns_dtypes_not_invalid(self, pa):
 
         # bytes
         df.columns = [b"foo", b"bar"]
-        with pytest.raises(NotImplementedError, match="|S3"):
-            # Bytes fails on read_parquet
-            check_round_trip(df, pa)
+        check_round_trip(df, pa)
 
         # python object
         df.columns = [

From ff0b740f0139da67c51595ac4cbe6c296449caf8 Mon Sep 17 00:00:00 2001
From: Pedro Freitas <pedrogmfreitas@tecnico.ulisboa.pt>
Date: Mon, 25 Mar 2024 22:00:27 +0000
Subject: [PATCH 2/3] sort whatsnew entries alphabetically

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index d7aafcd192374..4b41f9a6b9d69 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -378,6 +378,7 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
+- Fixed bug in :class:`Index` Index constructor did not allow FLS as indices. (:issue:`57645`)
 - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
@@ -386,7 +387,6 @@ Bug fixes
 - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
-- Fixed bug in :class:`Index` Index constructor did not allow FLS as indicies. (:issue:`57645`) 
 
 Categorical
 ^^^^^^^^^^^

From aaa456367e41c007aa152600521ec9b28ceca8c1 Mon Sep 17 00:00:00 2001
From: Pedro Freitas <pedrogmfreitas@tecnico.ulisboa.pt>
Date: Fri, 17 May 2024 09:58:12 +0100
Subject: [PATCH 3/3] While using the function set_index with the parameter
 inplace=True, the function attempted to create a new index with a dtype of
 FLS S{value}. This dtype was not recognized by the function
 _dtype_to_subclass, which raised a NotImplementedError. To address this, I
 added a verification to the function asarray_tuplesafe that converts data to
 an array with object type, allowing the index to be created succes sfully.

Additionally, I created a new test and simplified a previously created test. I also reverted the test file test_parquet.py to restore
the intended FLS behavior.
---
 doc/source/whatsnew/v3.0.0.rst               |  2 +-
 pandas/core/common.py                        |  2 +-
 pandas/core/indexes/base.py                  |  4 +---
 pandas/tests/frame/methods/test_set_index.py | 15 ++++++---------
 pandas/tests/indexes/test_index_new.py       | 12 ++++++++++++
 pandas/tests/io/test_parquet.py              |  4 +++-
 6 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 4b41f9a6b9d69..f76e0d1458731 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -378,7 +378,7 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-- Fixed bug in :class:`Index` Index constructor did not allow FLS as indices. (:issue:`57645`)
+- Fixed bug in :class:`Index` Index constructor was not converting FLS to object. (:issue:`57645`)
 - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 - Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 77e986a26fbe9..bf270efab8c5b 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -236,7 +236,7 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi
         values = list(values)
     elif isinstance(values, ABCIndex):
         return values._values
-    elif isinstance(values, ABCSeries):
+    elif isinstance(values, ABCSeries) and values.dtype.kind != "S":
         return values._values
 
     if isinstance(values, list) and dtype in [np.object_, object]:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index d32a6393c8edf..ebbd85be44009 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -625,9 +625,7 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
             # NB: assuming away MultiIndex
             return Index
 
-        elif (
-            dtype.kind == "S" or issubclass(dtype.type, str) or is_numeric_dtype(dtype)
-        ):
+        elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
             return Index
 
         raise NotImplementedError(dtype)
diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
index be8a2dfa20704..5dca06afe72d6 100644
--- a/pandas/tests/frame/methods/test_set_index.py
+++ b/pandas/tests/frame/methods/test_set_index.py
@@ -618,17 +618,14 @@ def test_set_index_raise_on_len(
             df.set_index(["A", df.A, box(values)], drop=drop, append=append)
 
     def test_set_index_with_FLS_Dtype(self):
-        string_length = 6
-        in_dtype, df_name = f"S{string_length}", "fruit"
-        data = ["apple", "banana", "orange", "grape"]
+        arr = np.array(["apple", "banana", "orange", "grape"], dtype="S6")
 
-        # Create array with FLS(|S{value}) dtype
-        arr = np.array(data, dtype=in_dtype)
-        df = DataFrame(Series(arr), columns=[df_name])
+        # Attempt to create a DataFrame with an array with FLS Dtype
+        df = DataFrame(Series(arr), columns=["fruits"])
 
-        # This will create a new Index with FLS dtype
-        expected = Index(data=Series(arr), name=df_name)
-        df.set_index(df_name, inplace=True)
+        # Create Index that converts FLS Dtype to object
+        expected = Index(data=Series(arr), name="fruits")
+        df.set_index("fruits", inplace=True)
         tm.assert_index_equal(df.index, expected)
 
 
diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
index b544ebac43ece..073f99d938bcd 100644
--- a/pandas/tests/indexes/test_index_new.py
+++ b/pandas/tests/indexes/test_index_new.py
@@ -185,6 +185,18 @@ def test_constructor_datetimes_mixed_tzs(self):
         expected = Index([dt1, dt2], dtype=object)
         tm.assert_index_equal(result, expected)
 
+    def test_FLS_to_object_conversion(self):
+        # Create NumPy array of fixed-length strings
+        arr = np.array(["apple", "banana", "orange", "grape"], dtype="S6")
+        # Create expected array for index
+        expected_arr = np.array(
+            [b"apple", b"banana", b"orange", b"grape"], dtype=object
+        )
+        # Create Index that converts FLS Dtype to object
+        index = Index(data=Series(arr), name="fruits")
+        expected = Index(data=Series(expected_arr), name="fruits")
+        tm.assert_index_equal(index, expected)
+
 
 class TestDtypeEnforced:
     # check we don't silently ignore the dtype keyword
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 398561f22d2cb..2860b3a6483af 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -1027,7 +1027,9 @@ def test_columns_dtypes_not_invalid(self, pa):
 
         # bytes
         df.columns = [b"foo", b"bar"]
-        check_round_trip(df, pa)
+        with pytest.raises(NotImplementedError, match="|S3"):
+            # Bytes fails on read_parquet
+            check_round_trip(df, pa)
 
         # python object
         df.columns = [