From 22c7edb930cf26234bd3d7cdc60013a20de3f8c0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Apr 2024 14:42:26 -0700
Subject: [PATCH 1/6] Use better data structures

---
 pandas/core/frame.py   | 23 ++++++++++++-----------
 pandas/core/generic.py |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 76df5c82e6239..db6ece019e191 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3700,7 +3700,7 @@ def transpose(
         nv.validate_transpose(args, {})
         # construct the args
 
-        dtypes = list(self.dtypes)
+        first_dtype = next(self.dtypes, None)
 
         if self._can_fast_transpose:
             # Note: tests pass without this, but this improves perf quite a bit.
@@ -3718,11 +3718,11 @@ def transpose(
 
         elif (
             self._is_homogeneous_type
-            and dtypes
-            and isinstance(dtypes[0], ExtensionDtype)
+            and first_dtype is not None
+            and isinstance(first_dtype, ExtensionDtype)
         ):
             new_values: list
-            if isinstance(dtypes[0], BaseMaskedDtype):
+            if isinstance(first_dtype, BaseMaskedDtype):
                 # We have masked arrays with the same dtype. We can transpose faster.
                 from pandas.core.arrays.masked import (
                     transpose_homogeneous_masked_arrays,
@@ -3731,7 +3731,7 @@ def transpose(
                 new_values = transpose_homogeneous_masked_arrays(
                     cast(Sequence[BaseMaskedArray], self._iter_column_arrays())
                 )
-            elif isinstance(dtypes[0], ArrowDtype):
+            elif isinstance(first_dtype, ArrowDtype):
                 # We have arrow EAs with the same dtype. We can transpose faster.
                 from pandas.core.arrays.arrow.array import (
                     ArrowExtensionArray,
@@ -3743,10 +3743,11 @@ def transpose(
                 )
             else:
                 # We have other EAs with the same dtype. We preserve dtype in transpose.
-                dtyp = dtypes[0]
-                arr_typ = dtyp.construct_array_type()
+                arr_typ = first_dtype.construct_array_type()
                 values = self.values
-                new_values = [arr_typ._from_sequence(row, dtype=dtyp) for row in values]
+                new_values = [
+                    arr_typ._from_sequence(row, dtype=first_dtype) for row in values
+                ]
 
             result = type(self)._from_arrays(
                 new_values,
@@ -5875,7 +5876,7 @@ def set_index(
             else:
                 arrays.append(self.index)
 
-        to_remove: list[Hashable] = []
+        to_remove: set[Hashable] = set()
         for col in keys:
             if isinstance(col, MultiIndex):
                 arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
@@ -5902,7 +5903,7 @@ def set_index(
                 arrays.append(frame[col])
                 names.append(col)
                 if drop:
-                    to_remove.append(col)
+                    to_remove.add(col)
 
             if len(arrays[-1]) != len(self):
                 # check newest element against length of calling frame, since
@@ -5919,7 +5920,7 @@ def set_index(
             raise ValueError(f"Index has duplicate keys: {duplicates}")
 
         # use set to handle duplicate column names gracefully in case of drop
-        for c in set(to_remove):
+        for c in to_remove:
             del frame[c]
 
         # clear up memory usage
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8af9503a3691d..398adebd1e278 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2045,7 +2045,7 @@ def __setstate__(self, state) -> None:
                 # e.g. say fill_value needing _mgr to be
                 # defined
                 meta = set(self._internal_names + self._metadata)
-                for k in list(meta):
+                for k in meta:
                     if k in state and k != "_flags":
                         v = state[k]
                         object.__setattr__(self, k, v)

From 0e7fa5efd93471b3f8982f385c0782f33ec3bc88 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Apr 2024 16:03:42 -0700
Subject: [PATCH 2/6] Use generator and set

---
 pandas/core/frame.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index db6ece019e191..c6ebe183df37e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2296,8 +2296,8 @@ def maybe_reorder(
                     exclude.update(index)
 
         if any(exclude):
-            arr_exclude = [x for x in exclude if x in arr_columns]
-            to_remove = [arr_columns.get_loc(col) for col in arr_exclude]
+            arr_exclude = (x for x in exclude if x in arr_columns)
+            to_remove = {arr_columns.get_loc(col) for col in arr_exclude}
             arrays = [v for i, v in enumerate(arrays) if i not in to_remove]
 
             columns = columns.drop(exclude)

From fa086fc4bfe4a9bb6b3e42cd076690ef9f304d1a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 9 Apr 2024 16:18:44 -0700
Subject: [PATCH 3/6] Move sorted to exception block, use set instead of list

---
 pandas/core/internals/construction.py | 15 +++++++--------
 pandas/core/tools/datetimes.py        |  8 ++++----
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 73b93110c9018..cea52bf8c91b2 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -567,7 +567,7 @@ def _extract_index(data) -> Index:
     if len(data) == 0:
         return default_index(0)
 
-    raw_lengths = []
+    raw_lengths = set()
     indexes: list[list[Hashable] | Index] = []
 
     have_raw_arrays = False
@@ -583,7 +583,7 @@ def _extract_index(data) -> Index:
             indexes.append(list(val.keys()))
         elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
             have_raw_arrays = True
-            raw_lengths.append(len(val))
+            raw_lengths.add(len(val))
         elif isinstance(val, np.ndarray) and val.ndim > 1:
             raise ValueError("Per-column arrays must each be 1-dimensional")
 
@@ -596,24 +596,23 @@ def _extract_index(data) -> Index:
         index = union_indexes(indexes, sort=False)
 
     if have_raw_arrays:
-        lengths = list(set(raw_lengths))
-        if len(lengths) > 1:
+        if len(raw_lengths) > 1:
             raise ValueError("All arrays must be of the same length")
 
         if have_dicts:
             raise ValueError(
                 "Mixing dicts with non-Series may lead to ambiguous ordering."
             )
-
+        raw_length = raw_lengths.pop()
         if have_series:
-            if lengths[0] != len(index):
+            if raw_length != len(index):
                 msg = (
-                    f"array length {lengths[0]} does not match index "
+                    f"array length {raw_length} does not match index "
                     f"length {len(index)}"
                 )
                 raise ValueError(msg)
         else:
-            index = default_index(lengths[0])
+            index = default_index(raw_length)
 
     return ensure_index(index)
 
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 2aeb1aff07a54..df7a6cdb1ea52 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -1124,18 +1124,18 @@ def f(value):
 
     # we require at least Ymd
     required = ["year", "month", "day"]
-    req = sorted(set(required) - set(unit_rev.keys()))
+    req = set(required) - set(unit_rev.keys())
     if len(req):
-        _required = ",".join(req)
+        _required = ",".join(sorted(req))
         raise ValueError(
             "to assemble mappings requires at least that "
             f"[year, month, day] be specified: [{_required}] is missing"
         )
 
     # keys we don't recognize
-    excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
+    excess = set(unit_rev.keys()) - set(_unit_map.values())
     if len(excess):
-        _excess = ",".join(excess)
+        _excess = ",".join(sorted(excess))
         raise ValueError(
             f"extra keys have been passed to the datetime assemblage: [{_excess}]"
         )

From c2142d3339d7a88d0213dc3d66fd2d933bb72ebe Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 15 Apr 2024 11:28:53 -0700
Subject: [PATCH 4/6] Another iterator, use iter

---
 pandas/_libs/tslibs/offsets.pyx | 10 ++++------
 pandas/core/frame.py            |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index e36abdf0ad971..00daeff21c425 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
         holidays = holidays + calendar.holidays().tolist()
     except AttributeError:
         pass
-    holidays = [_to_dt64D(dt) for dt in holidays]
-    holidays = tuple(sorted(holidays))
+    holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))
 
     kwargs = {"weekmask": weekmask}
     if holidays:
@@ -420,10 +419,9 @@ cdef class BaseOffset:
         if "holidays" in all_paras and not all_paras["holidays"]:
             all_paras.pop("holidays")
         exclude = ["kwds", "name", "calendar"]
-        attrs = [(k, v) for k, v in all_paras.items()
-                 if (k not in exclude) and (k[0] != "_")]
-        attrs = sorted(set(attrs))
-        params = tuple([str(type(self))] + attrs)
+        attrs = {(k, v) for k, v in all_paras.items()
+                 if (k not in exclude) and (k[0] != "_")}
+        params = tuple([str(type(self))] + sorted(attrs))
         return params
 
     @property
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 50b3e62e9256e..562927967a536 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3705,7 +3705,7 @@ def transpose(
         nv.validate_transpose(args, {})
         # construct the args
 
-        first_dtype = next(self.dtypes, None)
+        first_dtype = next(iter(self.dtypes), None)
 
         if self._can_fast_transpose:
             # Note: tests pass without this, but this improves perf quite a bit.

From a61895bd5dd88cc5ea56e4816690b57eb1d63846 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 15 Apr 2024 11:35:34 -0700
Subject: [PATCH 5/6] another set

---
 pandas/_libs/tslibs/offsets.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 00daeff21c425..107608ec9f606 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -418,7 +418,7 @@ cdef class BaseOffset:
 
         if "holidays" in all_paras and not all_paras["holidays"]:
             all_paras.pop("holidays")
-        exclude = ["kwds", "name", "calendar"]
+        exclude = {"kwds", "name", "calendar"}
         attrs = {(k, v) for k, v in all_paras.items()
                  if (k not in exclude) and (k[0] != "_")}
         params = tuple([str(type(self))] + sorted(attrs))

From 77e8b11054726ccec0e07919cfa9612b3d1471d1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:41:11 -0700
Subject: [PATCH 6/6] Dont use iterator protocol

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 61f6d6b23f4bc..b65a00db7d7df 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3705,7 +3705,7 @@ def transpose(
         nv.validate_transpose(args, {})
         # construct the args
 
-        first_dtype = next(iter(self.dtypes), None)
+        first_dtype = self.dtypes.iloc[0] if len(self.columns) else None
 
         if self._can_fast_transpose:
             # Note: tests pass without this, but this improves perf quite a bit.