pandas-dev
diff --git a/‎.github/workflows/comment-commands.yml
+1-1 b/‎.github/workflows/comment-commands.yml
+1-1
diff --git a/‎.github/workflows/deprecation-tracking-bot.yml
+1-1 b/‎.github/workflows/deprecation-tracking-bot.yml
+1-1
diff --git a/‎.github/workflows/unit-tests.yml
+3-3 b/‎.github/workflows/unit-tests.yml
+3-3
diff --git a/‎asv_bench/asv.conf.json
+1-1 b/‎asv_bench/asv.conf.json
+1-1
diff --git a/‎asv_bench/benchmarks/algorithms.py
+47-53 b/‎asv_bench/benchmarks/algorithms.py
+47-53
diff --git a/‎asv_bench/benchmarks/array.py
+3-3 b/‎asv_bench/benchmarks/array.py
+3-3
diff --git a/‎asv_bench/benchmarks/categoricals.py
+4-6 b/‎asv_bench/benchmarks/categoricals.py
+4-6
diff --git a/‎asv_bench/benchmarks/frame_methods.py
+2-1 b/‎asv_bench/benchmarks/frame_methods.py
+2-1
diff --git a/‎asv_bench/benchmarks/gil.py
+10-8 b/‎asv_bench/benchmarks/gil.py
+10-8
diff --git a/‎asv_bench/benchmarks/groupby.py
+46-1 b/‎asv_bench/benchmarks/groupby.py
+46-1
diff --git a/‎asv_bench/benchmarks/index_object.py
+1-3 b/‎asv_bench/benchmarks/index_object.py
+1-3
diff --git a/‎asv_bench/benchmarks/indexing.py
+16 b/‎asv_bench/benchmarks/indexing.py
+16
@@ -77,7 +77,7 @@ jobs:
           echo 'EOF' >> $GITHUB_ENV
           echo "REGEX=$REGEX" >> $GITHUB_ENV
 
-      - uses: actions/github-script@v6
+      - uses: actions/github-script@v7
         env:
           BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
           REGEX: ${{env.REGEX}}
 
@@ -21,7 +21,7 @@ jobs:
     env:
       DEPRECATION_TRACKER_ISSUE: 50578
     steps:
-    - uses: actions/github-script@v6
+    - uses: actions/github-script@v7
       id: update-deprecation-issue
       with:
         script: |
 
@@ -23,7 +23,7 @@ defaults:
 jobs:
   ubuntu:
     runs-on: ubuntu-22.04
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
       if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
 
   macos-windows:
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
       matrix:
         os: [ubuntu-22.04, macOS-latest, windows-latest]
 
-    timeout-minutes: 180
+    timeout-minutes: 90
 
     concurrency:
       #https://github.community/t/concurrecy-not-work-for-push/183068/7
 
@@ -41,7 +41,7 @@
     // pip (with all the conda available packages installed first,
     // followed by the pip installed packages).
     "matrix": {
-        "Cython": ["0.29.33"],
+        "Cython": ["3.0.5"],
         "matplotlib": [],
         "sqlalchemy": [],
         "scipy": [],
 
@@ -1,7 +1,6 @@
 from importlib import import_module
 
 import numpy as np
-import pyarrow as pa
 
 import pandas as pd
 
@@ -20,9 +19,9 @@ class Factorize:
         [True, False],
         [True, False],
         [
-            "int",
-            "uint",
-            "float",
+            "int64",
+            "uint64",
+            "float64",
             "object",
             "object_str",
             "datetime64[ns]",
@@ -36,28 +35,24 @@ class Factorize:
 
     def setup(self, unique, sort, dtype):
         N = 10**5
-        string_index = tm.makeStringIndex(N)
-        string_arrow = None
-        if dtype == "string[pyarrow]":
-            try:
-                string_arrow = pd.array(string_index, dtype="string[pyarrow]")
-            except ImportError:
-                raise NotImplementedError
-
-        data = {
-            "int": pd.Index(np.arange(N), dtype="int64"),
-            "uint": pd.Index(np.arange(N), dtype="uint64"),
-            "float": pd.Index(np.random.randn(N), dtype="float64"),
-            "object_str": string_index,
-            "object": pd.Index(np.arange(N), dtype="object"),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
-            "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
-            ),
-            "Int64": pd.array(np.arange(N), dtype="Int64"),
-            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
-            "string[pyarrow]": string_arrow,
-        }[dtype]
+
+        if dtype in ["int64", "uint64", "Int64", "object"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        elif dtype == "float64":
+            data = pd.Index(np.random.randn(N), dtype=dtype)
+        elif dtype == "boolean":
+            data = pd.array(np.random.randint(0, 2, N), dtype=dtype)
+        elif dtype == "datetime64[ns]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N)
+        elif dtype == "datetime64[ns, tz]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
+        elif dtype == "object_str":
+            data = tm.makeStringIndex(N)
+        elif dtype == "string[pyarrow]":
+            data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
+        else:
+            raise NotImplementedError
+
         if not unique:
             data = data.repeat(5)
         self.data = data
@@ -74,9 +69,9 @@ class Duplicated:
         [True, False],
         ["first", "last", False],
         [
-            "int",
-            "uint",
-            "float",
+            "int64",
+            "uint64",
+            "float64",
             "string",
             "datetime64[ns]",
             "datetime64[ns, tz]",
@@ -88,22 +83,20 @@ class Duplicated:
 
     def setup(self, unique, keep, dtype):
         N = 10**5
-        data = {
-            "int": pd.Index(np.arange(N), dtype="int64"),
-            "uint": pd.Index(np.arange(N), dtype="uint64"),
-            "float": pd.Index(np.random.randn(N), dtype="float64"),
-            "string": tm.makeStringIndex(N),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
-            "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
-            ),
-            "timestamp[ms][pyarrow]": pd.Index(
-                np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
-            ),
-            "duration[s][pyarrow]": pd.Index(
-                np.arange(N), dtype=pd.ArrowDtype(pa.duration("s"))
-            ),
-        }[dtype]
+        if dtype in ["int64", "uint64"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        elif dtype == "float64":
+            data = pd.Index(np.random.randn(N), dtype="float64")
+        elif dtype == "string":
+            data = tm.makeStringIndex(N)
+        elif dtype == "datetime64[ns]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N)
+        elif dtype == "datetime64[ns, tz]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
+        elif dtype in ["timestamp[ms][pyarrow]", "duration[s][pyarrow]"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        else:
+            raise NotImplementedError
         if not unique:
             data = data.repeat(5)
         self.idx = data
@@ -181,21 +174,22 @@ class Quantile:
     params = [
         [0, 0.5, 1],
         ["linear", "nearest", "lower", "higher", "midpoint"],
-        ["float", "int", "uint"],
+        ["float64", "int64", "uint64"],
     ]
     param_names = ["quantile", "interpolation", "dtype"]
 
     def setup(self, quantile, interpolation, dtype):
         N = 10**5
-        data = {
-            "int": np.arange(N),
-            "uint": np.arange(N).astype(np.uint64),
-            "float": np.random.randn(N),
-        }
-        self.idx = pd.Series(data[dtype].repeat(5))
+        if dtype in ["int64", "uint64"]:
+            data = np.arange(N, dtype=dtype)
+        elif dtype == "float64":
+            data = np.random.randn(N)
+        else:
+            raise NotImplementedError
+        self.ser = pd.Series(data.repeat(5))
 
     def time_quantile(self, quantile, interpolation, dtype):
-        self.idx.quantile(quantile, interpolation=interpolation)
+        self.ser.quantile(quantile, interpolation=interpolation)
 
 
 class SortIntegerArray:
 
@@ -31,9 +31,9 @@ def time_from_float_array(self):
 class IntegerArray:
     def setup(self):
         N = 250_000
-        self.values_integer = np.array([1, 0, 1, 0] * N)
-        self.data = np.array([1, 2, 3, 4] * N, dtype="int64")
-        self.mask = np.array([False, False, True, False] * N)
+        self.values_integer = np.tile(np.array([1, 0, 1, 0]), N)
+        self.data = np.tile(np.array([1, 2, 3, 4], dtype="int64"), N)
+        self.mask = np.tile(np.array([False, False, True, False]), N)
 
     def time_constructor(self):
         pd.arrays.IntegerArray(self.data, self.mask)
 
@@ -260,18 +260,16 @@ class CategoricalSlicing:
     def setup(self, index):
         N = 10**6
         categories = ["a", "b", "c"]
-        values = [0] * N + [1] * N + [2] * N
         if index == "monotonic_incr":
-            self.data = pd.Categorical.from_codes(values, categories=categories)
+            codes = np.repeat([0, 1, 2], N)
         elif index == "monotonic_decr":
-            self.data = pd.Categorical.from_codes(
-                list(reversed(values)), categories=categories
-            )
+            codes = np.repeat([2, 1, 0], N)
         elif index == "non_monotonic":
-            self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
+            codes = np.tile([0, 1, 2], N)
         else:
             raise ValueError(f"Invalid index param: {index}")
 
+        self.data = pd.Categorical.from_codes(codes, categories=categories)
         self.scalar = 10000
         self.list = list(range(10000))
         self.cat_scalar = "b"
 
@@ -640,7 +640,8 @@ def time_frame_nunique(self):
 
 class SeriesNuniqueWithNan:
     def setup(self):
-        self.ser = Series(100000 * (100 * [np.nan] + list(range(100)))).astype(float)
+        values = 100 * [np.nan] + list(range(100))
+        self.ser = Series(np.tile(values, 10000), dtype=float)
 
     def time_series_nunique_nan(self):
         self.ser.nunique()
 
@@ -272,18 +272,20 @@ class ParallelReadCSV(BaseIO):
     def setup(self, dtype):
         rows = 10000
         cols = 50
-        data = {
-            "float": DataFrame(np.random.randn(rows, cols)),
-            "datetime": DataFrame(
+        if dtype == "float":
+            df = DataFrame(np.random.randn(rows, cols))
+        elif dtype == "datetime":
+            df = DataFrame(
                 np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
-            ),
-            "object": DataFrame(
+            )
+        elif dtype == "object":
+            df = DataFrame(
                 "foo", index=range(rows), columns=["object%03d" for _ in range(5)]
-            ),
-        }
+            )
+        else:
+            raise NotImplementedError
 
         self.fname = f"__test_{dtype}__.csv"
-        df = data[dtype]
         df.to_csv(self.fname)
 
         @test_parallel(num_threads=2)
 
@@ -713,7 +713,7 @@ def setup(self, dtype, tie_method):
         if dtype == "datetime64":
             data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
         else:
-            data = np.array([1] * N, dtype=dtype)
+            data = np.ones(N, dtype=dtype)
         self.df = DataFrame({"values": data, "key": ["foo"] * N})
 
     def time_rank_ties(self, dtype, tie_method):
@@ -802,6 +802,51 @@ def time_groupby_extra_cat_nosort(self, observed):
         self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count()
 
 
+class MultipleCategories:
+    def setup(self):
+        N = 10**3
+        arr = np.random.random(N)
+        data = {
+            "a1": Categorical(np.random.randint(10000, size=N)),
+            "a2": Categorical(np.random.randint(10000, size=N)),
+            "b": arr,
+        }
+        self.df = DataFrame(data)
+        data = {
+            "a1": Categorical(np.random.randint(10000, size=N), ordered=True),
+            "a2": Categorical(np.random.randint(10000, size=N), ordered=True),
+            "b": arr,
+        }
+        self.df_ordered = DataFrame(data)
+        data = {
+            "a1": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
+            "a2": Categorical(np.random.randint(100, size=N), categories=np.arange(N)),
+            "b": arr,
+        }
+        self.df_extra_cat = DataFrame(data)
+
+    def time_groupby_sort(self):
+        self.df.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_nosort(self):
+        self.df.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_ordered_sort(self):
+        self.df_ordered.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_ordered_nosort(self):
+        self.df_ordered.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_extra_cat_sort(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].count()
+
+    def time_groupby_extra_cat_nosort(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False, sort=False)["b"].count()
+
+    def time_groupby_transform(self):
+        self.df_extra_cat.groupby(["a1", "a2"], observed=False)["b"].cumsum()
+
+
 class Datelike:
     # GH 14338
     params = ["period_range", "date_range", "date_range_tz"]
 
@@ -161,9 +161,7 @@ def setup(self, dtype):
         self.sorted = self.idx.sort_values()
         half = N // 2
         self.non_unique = self.idx[:half].append(self.idx[:half])
-        self.non_unique_sorted = (
-            self.sorted[:half].append(self.sorted[:half]).sort_values()
-        )
+        self.non_unique_sorted = self.sorted[:half].repeat(2)
         self.key = self.sorted[N // 4]
 
     def time_boolean_array(self, dtype):
 
@@ -306,6 +306,10 @@ def time_loc_null_slice_plus_slice(self, unique_levels):
         target = (self.tgt_null_slice, self.tgt_slice)
         self.df.loc[target, :]
 
+    def time_loc_multiindex(self, unique_levels):
+        target = self.df.index[::10]
+        self.df.loc[target]
+
     def time_xs_level_0(self, unique_levels):
         target = self.tgt_scalar
         self.df.xs(target, level=0)
@@ -515,6 +519,18 @@ def time_setitem_list(self):
         self.df[[100, 200, 300]] = 100
 
 
+class SetitemObjectDtype:
+    # GH#19299
+
+    def setup(self):
+        N = 1000
+        cols = 500
+        self.df = DataFrame(index=range(N), columns=range(cols), dtype=object)
+
+    def time_setitem_object_dtype(self):
+        self.df.loc[0, 1] = 1.0
+
+
 class ChainIndexing:
     params = [None, "warn"]
     param_names = ["mode"]