From 1ba1486845eff5a9767c64859e82ed5e34f1947d Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Sun, 19 May 2024 18:11:50 -0400
Subject: [PATCH 1/6] PERF: stack on non-MultiIndex columns

---
 pandas/core/reshape/reshape.py | 156 ++++++++++++++++++++-------------
 1 file changed, 96 insertions(+), 60 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 01cc85ceff181..16196ef92e417 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -925,75 +925,28 @@ def _reorder_for_extension_array_stack(
 def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
     if frame.columns.nunique() != len(frame.columns):
         raise ValueError("Columns with duplicate values are not supported in stack")
-
-    # If we need to drop `level` from columns, it needs to be in descending order
     set_levels = set(level)
-    drop_levnums = sorted(level, reverse=True)
     stack_cols = frame.columns._drop_level_numbers(
         [k for k in range(frame.columns.nlevels - 1, -1, -1) if k not in set_levels]
     )
-    if len(level) > 1:
-        # Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
-        sorter = np.argsort(level)
-        assert isinstance(stack_cols, MultiIndex)
-        ordered_stack_cols = stack_cols._reorder_ilevels(sorter)
-    else:
-        ordered_stack_cols = stack_cols
-
-    stack_cols_unique = stack_cols.unique()
-    ordered_stack_cols_unique = ordered_stack_cols.unique()
-
-    # Grab data for each unique index to be stacked
-    buf = []
-    for idx in stack_cols_unique:
-        if len(frame.columns) == 1:
-            data = frame.copy()
-        else:
-            if not isinstance(frame.columns, MultiIndex) and not isinstance(idx, tuple):
-                # GH#57750 - if the frame is an Index with tuples, .loc below will fail
-                column_indexer = idx
-            else:
-                # Take the data from frame corresponding to this idx value
-                if len(level) == 1:
-                    idx = (idx,)
-                gen = iter(idx)
-                column_indexer = tuple(
-                    next(gen) if k in set_levels else slice(None)
-                    for k in range(frame.columns.nlevels)
-                )
-            data = frame.loc[:, column_indexer]
-
-        if len(level) < frame.columns.nlevels:
-            data.columns = data.columns._drop_level_numbers(drop_levnums)
-        elif stack_cols.nlevels == 1:
-            if data.ndim == 1:
-                data.name = 0
-            else:
-                data.columns = RangeIndex(len(data.columns))
-        buf.append(data)
 
     result: Series | DataFrame
-    if len(buf) > 0 and not frame.empty:
-        result = concat(buf, ignore_index=True)
-        ratio = len(result) // len(frame)
-    else:
-        # input is empty
-        if len(level) < frame.columns.nlevels:
-            # concat column order may be different from dropping the levels
-            new_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
+    if not isinstance(frame.columns, MultiIndex):
+        # Fast path when we're stacking the columns of a non-MultiIndex.
+        # When columns are homogeneous EAs, we pass through object
+        # dtype but this is still faster than the normal path.
+        if len(frame.columns) > 0 and frame._is_homogeneous_type:
+            dtype = frame._mgr.arrays[0].dtype
         else:
-            new_columns = [0]
-        result = DataFrame(columns=new_columns, dtype=frame._values.dtype)
-        ratio = 0
-
-    if len(level) < frame.columns.nlevels:
-        # concat column order may be different from dropping the levels
-        desired_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
-        if not result.columns.equals(desired_columns):
-            result = result[desired_columns]
+            dtype = None
+        result = Series(frame._values.ravel(order="F"), dtype=dtype)
+    else:
+        result = stack_reshape(frame, level, set_levels, stack_cols)
 
     # Construct the correct MultiIndex by combining the frame's index and
     # stacked columns.
+    ratio = 0 if frame.empty else len(result) // len(frame)
+
     index_levels: list | FrozenList
     if isinstance(frame.index, MultiIndex):
         index_levels = frame.index.levels
@@ -1002,12 +955,22 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
         codes, uniques = factorize(frame.index, use_na_sentinel=False)
         index_levels = [uniques]
         index_codes = list(np.tile(codes, (1, ratio)))
+
+    if len(level) > 1:
+        # Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
+        sorter = np.argsort(level)
+        assert isinstance(stack_cols, MultiIndex)
+        ordered_stack_cols = stack_cols._reorder_ilevels(sorter)
+    else:
+        ordered_stack_cols = stack_cols
+    ordered_stack_cols_unique = ordered_stack_cols.unique()
     if isinstance(ordered_stack_cols, MultiIndex):
         column_levels = ordered_stack_cols.levels
         column_codes = ordered_stack_cols.drop_duplicates().codes
     else:
-        column_levels = [ordered_stack_cols.unique()]
+        column_levels = [ordered_stack_cols_unique]
         column_codes = [factorize(ordered_stack_cols_unique, use_na_sentinel=False)[0]]
+
     # error: Incompatible types in assignment (expression has type "list[ndarray[Any,
     # dtype[Any]]]", variable has type "FrozenList")
     column_codes = [np.repeat(codes, len(frame)) for codes in column_codes]  # type: ignore[assignment]
@@ -1035,3 +998,76 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
         result.name = None
 
     return result
+
+
+def stack_reshape(
+    frame: DataFrame, level: list[int], set_levels: set[int], stack_cols: Index
+) -> Series | DataFrame:
+    """Reshape the data of a frame for stack.
+
+    This function takes care of most of the work that stack needs to do. Caller
+    will sort the result once the appropriate index is set.
+
+    Parameters
+    ----------
+    frame: DataFrame
+        DataFrame that is to be stacked.
+    level: list of ints.
+        Levels of the columns to stack.
+    set_levels: set of ints.
+        Same as level, but as a set.
+    stack_cols: Index.
+        Columns of the result when the DataFrame is stacked.
+
+    Returns
+    -------
+    The data of behind the stacked DataFrame.
+    """
+    # non-MultIndex takes a fast path.
+    assert isinstance(frame.columns, MultiIndex)
+    # If we need to drop `level` from columns, it needs to be in descending order
+    drop_levnums = sorted(level, reverse=True)
+
+    # Grab data for each unique index to be stacked
+    buf = []
+    for idx in stack_cols.unique():
+        if len(frame.columns) == 1:
+            data = frame.copy()
+        else:
+            # Take the data from frame corresponding to this idx value
+            if len(level) == 1:
+                idx = (idx,)
+            gen = iter(idx)
+            column_indexer = tuple(
+                next(gen) if k in set_levels else slice(None)
+                for k in range(frame.columns.nlevels)
+            )
+            data = frame.loc[:, column_indexer]
+
+        if len(level) < frame.columns.nlevels:
+            data.columns = data.columns._drop_level_numbers(drop_levnums)
+        elif stack_cols.nlevels == 1:
+            if data.ndim == 1:
+                data.name = 0
+            else:
+                data.columns = RangeIndex(len(data.columns))
+        buf.append(data)
+
+    if len(buf) > 0 and not frame.empty:
+        result = concat(buf, ignore_index=True)
+    else:
+        # input is empty
+        if len(level) < frame.columns.nlevels:
+            # concat column order may be different from dropping the levels
+            new_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
+        else:
+            new_columns = [0]
+        result = DataFrame(columns=new_columns, dtype=frame._values.dtype)
+
+    if len(level) < frame.columns.nlevels:
+        # concat column order may be different from dropping the levels
+        desired_columns = frame.columns._drop_level_numbers(drop_levnums).unique()
+        if not result.columns.equals(desired_columns):
+            result = result[desired_columns]
+
+    return result

From bf5d60bcb4a559446ff2cfb88d23d69cb4b53ec7 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Mon, 20 May 2024 21:32:05 -0400
Subject: [PATCH 2/6] WIP

---
 pandas/core/reshape/reshape.py           | 2 +-
 pandas/tests/extension/base/reshaping.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 16196ef92e417..9ec0be22164e0 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -939,7 +939,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
             dtype = frame._mgr.arrays[0].dtype
         else:
             dtype = None
-        result = Series(frame._values.ravel(order="F"), dtype=dtype)
+        result = frame._constructor_sliced(frame._values.ravel(order="F"), dtype=dtype)
     else:
         result = stack_reshape(frame, level, set_levels, stack_cols)
 
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 489cd15644d04..4d3358115d9eb 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -266,7 +266,7 @@ def test_stack(self, data, columns, future_stack):
         expected = expected.astype(object)
 
         if isinstance(expected, pd.Series):
-            assert result.dtype == df.iloc[:, 0].dtype
+            assert result.dtype == df.iloc[:, 0].dtype, f'{type(result.dtype), result.dtype} vs {type(df.iloc[:, 0].dtype), df.iloc[:, 0].dtype}'
         else:
             assert all(result.dtypes == df.iloc[:, 0].dtype)
 

From ea4d614003c5b9b41058e300a65ab7e294750464 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 23 May 2024 22:07:05 -0400
Subject: [PATCH 3/6] Use reshape instead of ravel

---
 pandas/core/reshape/reshape.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 9ec0be22164e0..4b05f724922c3 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -939,7 +939,9 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
             dtype = frame._mgr.arrays[0].dtype
         else:
             dtype = None
-        result = frame._constructor_sliced(frame._values.ravel(order="F"), dtype=dtype)
+        result = frame._constructor_sliced(
+            frame._values.reshape(-1, order="F"), dtype=dtype
+        )
     else:
         result = stack_reshape(frame, level, set_levels, stack_cols)
 

From 348fbebc3634693cbb267efeb029066ae83b3aa8 Mon Sep 17 00:00:00 2001
From: richard <rhshadrach@gmail.com>
Date: Thu, 23 May 2024 22:46:40 -0400
Subject: [PATCH 4/6] arrays -> blocks

---
 pandas/core/reshape/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 76ecccfd7a4ad..d94069032c541 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -942,7 +942,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
         # When columns are homogeneous EAs, we pass through object
         # dtype but this is still faster than the normal path.
         if len(frame.columns) > 0 and frame._is_homogeneous_type:
-            dtype = frame._mgr.arrays[0].dtype
+            dtype = frame._mgr.blocks[0].dtype
         else:
             dtype = None
         result = frame._constructor_sliced(

From de694868e5e87b798c1dfcf3cde3e4aa254a2774 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 2 Jun 2024 10:07:03 -0400
Subject: [PATCH 5/6] Update test

---
 pandas/core/reshape/reshape.py           |  2 +-
 pandas/tests/extension/base/reshaping.py | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index d94069032c541..09d84255c04ba 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -940,7 +940,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
     if not isinstance(frame.columns, MultiIndex):
         # Fast path when we're stacking the columns of a non-MultiIndex.
         # When columns are homogeneous EAs, we pass through object
-        # dtype but this is still faster than the normal path.
+        # dtype but this is still slightly faster than the normal path.
         if len(frame.columns) > 0 and frame._is_homogeneous_type:
             dtype = frame._mgr.blocks[0].dtype
         else:
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 4d3358115d9eb..c105a232b5fa4 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.dtypes import NumpyEADtype
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.api.extensions import ExtensionArray
@@ -266,7 +268,13 @@ def test_stack(self, data, columns, future_stack):
         expected = expected.astype(object)
 
         if isinstance(expected, pd.Series):
-            assert result.dtype == df.iloc[:, 0].dtype, f'{type(result.dtype), result.dtype} vs {type(df.iloc[:, 0].dtype), df.iloc[:, 0].dtype}'
+            if future_stack and isinstance(data.dtype, NumpyEADtype):
+                # future_stack=True constructs the result specifying the dtype
+                # using the dtype of the input; we thus get the underlying
+                # NumPy dtype as the result instead of the NumpyExtensionArray
+                assert result.dtype == df.iloc[:, 0].to_numpy().dtype
+            else:
+                assert result.dtype == df.iloc[:, 0].dtype
         else:
             assert all(result.dtypes == df.iloc[:, 0].dtype)
 

From 8feda0e984c04d7be6e7d1481314990eb0964b8f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 2 Jun 2024 10:13:43 -0400
Subject: [PATCH 6/6] whatsnew

---
 doc/source/whatsnew/v3.0.0.rst           | 1 +
 pandas/core/reshape/reshape.py           | 2 +-
 pandas/tests/extension/base/reshaping.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 865996bdf8892..be89b08d2b3f0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -432,6 +432,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 09d84255c04ba..5cb2edc1fa912 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -938,7 +938,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
 
     result: Series | DataFrame
     if not isinstance(frame.columns, MultiIndex):
-        # Fast path when we're stacking the columns of a non-MultiIndex.
+        # GH#58817 Fast path when we're stacking the columns of a non-MultiIndex.
         # When columns are homogeneous EAs, we pass through object
         # dtype but this is still slightly faster than the normal path.
         if len(frame.columns) > 0 and frame._is_homogeneous_type:
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index c105a232b5fa4..e6887d80cf8c1 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -269,7 +269,7 @@ def test_stack(self, data, columns, future_stack):
 
         if isinstance(expected, pd.Series):
             if future_stack and isinstance(data.dtype, NumpyEADtype):
-                # future_stack=True constructs the result specifying the dtype
+                # GH#58817 future_stack=True constructs the result specifying the dtype
                 # using the dtype of the input; we thus get the underlying
                 # NumPy dtype as the result instead of the NumpyExtensionArray
                 assert result.dtype == df.iloc[:, 0].to_numpy().dtype