pandas-dev
diff --git a/‎.github/workflows/broken-linkcheck.yml
+38 b/‎.github/workflows/broken-linkcheck.yml
+38
diff --git a/‎.github/workflows/unit-tests.yml
+3-3 b/‎.github/workflows/unit-tests.yml
+3-3
diff --git a/‎asv_bench/asv.conf.json
+1-1 b/‎asv_bench/asv.conf.json
+1-1
diff --git a/‎asv_bench/benchmarks/algorithms.py
+47-53 b/‎asv_bench/benchmarks/algorithms.py
+47-53
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+1-1 b/‎asv_bench/benchmarks/arithmetic.py
+1-1
diff --git a/‎asv_bench/benchmarks/array.py
+3-3 b/‎asv_bench/benchmarks/array.py
+3-3
diff --git a/‎asv_bench/benchmarks/categoricals.py
+4-6 b/‎asv_bench/benchmarks/categoricals.py
+4-6
diff --git a/‎asv_bench/benchmarks/frame_methods.py
+5-4 b/‎asv_bench/benchmarks/frame_methods.py
+5-4
diff --git a/‎asv_bench/benchmarks/gil.py
+11-9 b/‎asv_bench/benchmarks/gil.py
+11-9
@@ -0,0 +1,38 @@
+name: Linkcheck
+on:
+  schedule:
+  # Run monthly on the 1st day of the month
+    - cron: '0 0 1 * *'
+  pull_request:
+    paths:
+      - ".github/workflows/broken-linkcheck.yml"
+      - "doc/make.py"
+jobs:
+  linkcheck:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Conda
+        uses: ./.github/actions/setup-conda
+
+      - name: Build Pandas
+        uses: ./.github/actions/build_pandas
+
+      - name: Run linkcheck script
+        working-directory: ./doc
+        run: |
+          set -o pipefail
+          python make.py linkcheck | tee linkcheck.txt
+
+      - name: Display broken links
+        if: failure()
+        working-directory: ./doc
+        run: grep broken linkcheck.txt
@@ -23,7 +23,7 @@ defaults:
 jobs:
   ubuntu:
     runs-on: ubuntu-22.04
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
       if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
 
   macos-windows:
-    timeout-minutes: 180
+    timeout-minutes: 90
     strategy:
       matrix:
         os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
       matrix:
         os: [ubuntu-22.04, macOS-latest, windows-latest]
 
-    timeout-minutes: 180
+    timeout-minutes: 90
 
     concurrency:
       #https://github.community/t/concurrecy-not-work-for-push/183068/7
 
@@ -41,7 +41,7 @@
     // pip (with all the conda available packages installed first,
     // followed by the pip installed packages).
     "matrix": {
-        "Cython": ["0.29.33"],
+        "Cython": ["3.0.5"],
         "matplotlib": [],
         "sqlalchemy": [],
         "scipy": [],
 
@@ -1,7 +1,6 @@
 from importlib import import_module
 
 import numpy as np
-import pyarrow as pa
 
 import pandas as pd
 
@@ -20,9 +19,9 @@ class Factorize:
         [True, False],
         [True, False],
         [
-            "int",
-            "uint",
-            "float",
+            "int64",
+            "uint64",
+            "float64",
             "object",
             "object_str",
             "datetime64[ns]",
@@ -36,28 +35,24 @@ class Factorize:
 
     def setup(self, unique, sort, dtype):
         N = 10**5
-        string_index = tm.makeStringIndex(N)
-        string_arrow = None
-        if dtype == "string[pyarrow]":
-            try:
-                string_arrow = pd.array(string_index, dtype="string[pyarrow]")
-            except ImportError:
-                raise NotImplementedError
-
-        data = {
-            "int": pd.Index(np.arange(N), dtype="int64"),
-            "uint": pd.Index(np.arange(N), dtype="uint64"),
-            "float": pd.Index(np.random.randn(N), dtype="float64"),
-            "object_str": string_index,
-            "object": pd.Index(np.arange(N), dtype="object"),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
-            "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
-            ),
-            "Int64": pd.array(np.arange(N), dtype="Int64"),
-            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
-            "string[pyarrow]": string_arrow,
-        }[dtype]
+
+        if dtype in ["int64", "uint64", "Int64", "object"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        elif dtype == "float64":
+            data = pd.Index(np.random.randn(N), dtype=dtype)
+        elif dtype == "boolean":
+            data = pd.array(np.random.randint(0, 2, N), dtype=dtype)
+        elif dtype == "datetime64[ns]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N)
+        elif dtype == "datetime64[ns, tz]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
+        elif dtype == "object_str":
+            data = tm.makeStringIndex(N)
+        elif dtype == "string[pyarrow]":
+            data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
+        else:
+            raise NotImplementedError
+
         if not unique:
             data = data.repeat(5)
         self.data = data
@@ -74,9 +69,9 @@ class Duplicated:
         [True, False],
         ["first", "last", False],
         [
-            "int",
-            "uint",
-            "float",
+            "int64",
+            "uint64",
+            "float64",
             "string",
             "datetime64[ns]",
             "datetime64[ns, tz]",
@@ -88,22 +83,20 @@ class Duplicated:
 
     def setup(self, unique, keep, dtype):
         N = 10**5
-        data = {
-            "int": pd.Index(np.arange(N), dtype="int64"),
-            "uint": pd.Index(np.arange(N), dtype="uint64"),
-            "float": pd.Index(np.random.randn(N), dtype="float64"),
-            "string": tm.makeStringIndex(N),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
-            "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
-            ),
-            "timestamp[ms][pyarrow]": pd.Index(
-                np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
-            ),
-            "duration[s][pyarrow]": pd.Index(
-                np.arange(N), dtype=pd.ArrowDtype(pa.duration("s"))
-            ),
-        }[dtype]
+        if dtype in ["int64", "uint64"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        elif dtype == "float64":
+            data = pd.Index(np.random.randn(N), dtype="float64")
+        elif dtype == "string":
+            data = tm.makeStringIndex(N)
+        elif dtype == "datetime64[ns]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N)
+        elif dtype == "datetime64[ns, tz]":
+            data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
+        elif dtype in ["timestamp[ms][pyarrow]", "duration[s][pyarrow]"]:
+            data = pd.Index(np.arange(N), dtype=dtype)
+        else:
+            raise NotImplementedError
         if not unique:
             data = data.repeat(5)
         self.idx = data
@@ -181,21 +174,22 @@ class Quantile:
     params = [
         [0, 0.5, 1],
         ["linear", "nearest", "lower", "higher", "midpoint"],
-        ["float", "int", "uint"],
+        ["float64", "int64", "uint64"],
     ]
     param_names = ["quantile", "interpolation", "dtype"]
 
     def setup(self, quantile, interpolation, dtype):
         N = 10**5
-        data = {
-            "int": np.arange(N),
-            "uint": np.arange(N).astype(np.uint64),
-            "float": np.random.randn(N),
-        }
-        self.idx = pd.Series(data[dtype].repeat(5))
+        if dtype in ["int64", "uint64"]:
+            data = np.arange(N, dtype=dtype)
+        elif dtype == "float64":
+            data = np.random.randn(N)
+        else:
+            raise NotImplementedError
+        self.ser = pd.Series(data.repeat(5))
 
     def time_quantile(self, quantile, interpolation, dtype):
-        self.idx.quantile(quantile, interpolation=interpolation)
+        self.ser.quantile(quantile, interpolation=interpolation)
 
 
 class SortIntegerArray:
 
@@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
     param_names = ["func"]
 
     def setup(self, func):
-        array = date_range("20200101 00:00", "20200102 0:00", freq="S")
+        array = date_range("20200101 00:00", "20200102 0:00", freq="s")
         level_0_names = [str(i) for i in range(30)]
 
         index = pd.MultiIndex.from_product([level_0_names, array])
 
@@ -31,9 +31,9 @@ def time_from_float_array(self):
 class IntegerArray:
     def setup(self):
         N = 250_000
-        self.values_integer = np.array([1, 0, 1, 0] * N)
-        self.data = np.array([1, 2, 3, 4] * N, dtype="int64")
-        self.mask = np.array([False, False, True, False] * N)
+        self.values_integer = np.tile(np.array([1, 0, 1, 0]), N)
+        self.data = np.tile(np.array([1, 2, 3, 4], dtype="int64"), N)
+        self.mask = np.tile(np.array([False, False, True, False]), N)
 
     def time_constructor(self):
         pd.arrays.IntegerArray(self.data, self.mask)
 
@@ -260,18 +260,16 @@ class CategoricalSlicing:
     def setup(self, index):
         N = 10**6
         categories = ["a", "b", "c"]
-        values = [0] * N + [1] * N + [2] * N
         if index == "monotonic_incr":
-            self.data = pd.Categorical.from_codes(values, categories=categories)
+            codes = np.repeat([0, 1, 2], N)
         elif index == "monotonic_decr":
-            self.data = pd.Categorical.from_codes(
-                list(reversed(values)), categories=categories
-            )
+            codes = np.repeat([2, 1, 0], N)
         elif index == "non_monotonic":
-            self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
+            codes = np.tile([0, 1, 2], N)
         else:
             raise ValueError(f"Invalid index param: {index}")
 
+        self.data = pd.Categorical.from_codes(codes, categories=categories)
         self.scalar = 10000
         self.list = list(range(10000))
         self.cat_scalar = "b"
 
@@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
         N, M = 10000, 100
         if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
             data = {
-                "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
+                "datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
                 "datetime64[ns, tz]": date_range(
-                    "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                    "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
                 ),
                 "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
             }
@@ -640,7 +640,8 @@ def time_frame_nunique(self):
 
 class SeriesNuniqueWithNan:
     def setup(self):
-        self.ser = Series(100000 * (100 * [np.nan] + list(range(100)))).astype(float)
+        values = 100 * [np.nan] + list(range(100))
+        self.ser = Series(np.tile(values, 10000), dtype=float)
 
     def time_series_nunique_nan(self):
         self.ser.nunique()
@@ -649,7 +650,7 @@ def time_series_nunique_nan(self):
 class Duplicated:
     def setup(self):
         n = 1 << 20
-        t = date_range("2015-01-01", freq="S", periods=(n // 64))
+        t = date_range("2015-01-01", freq="s", periods=(n // 64))
         xs = np.random.randn(n // 64).round(2)
         self.df = DataFrame(
             {
 
@@ -212,7 +212,7 @@ def run(dti):
     def time_datetime_to_period(self):
         @test_parallel(num_threads=2)
         def run(dti):
-            dti.to_period("S")
+            dti.to_period("s")
 
         run(self.dti)
 
@@ -272,18 +272,20 @@ class ParallelReadCSV(BaseIO):
     def setup(self, dtype):
         rows = 10000
         cols = 50
-        data = {
-            "float": DataFrame(np.random.randn(rows, cols)),
-            "datetime": DataFrame(
+        if dtype == "float":
+            df = DataFrame(np.random.randn(rows, cols))
+        elif dtype == "datetime":
+            df = DataFrame(
                 np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
-            ),
-            "object": DataFrame(
+            )
+        elif dtype == "object":
+            df = DataFrame(
                 "foo", index=range(rows), columns=["object%03d" for _ in range(5)]
-            ),
-        }
+            )
+        else:
+            raise NotImplementedError
 
         self.fname = f"__test_{dtype}__.csv"
-        df = data[dtype]
         df.to_csv(self.fname)
 
         @test_parallel(num_threads=2)