pandas-dev
diff --git a/‎.github/workflows/broken-linkcheck.yml
+38 b/‎.github/workflows/broken-linkcheck.yml
+38
diff --git a/‎.github/workflows/unit-tests.yml
+8-4 b/‎.github/workflows/unit-tests.yml
+8-4
diff --git a/‎.github/workflows/wheels.yml
+1-1 b/‎.github/workflows/wheels.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+9-9 b/‎.pre-commit-config.yaml
+9-9
diff --git a/‎asv_bench/benchmarks/algorithms.py
+4-4 b/‎asv_bench/benchmarks/algorithms.py
+4-4
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+1-1 b/‎asv_bench/benchmarks/arithmetic.py
+1-1
diff --git a/‎asv_bench/benchmarks/frame_methods.py
+3-3 b/‎asv_bench/benchmarks/frame_methods.py
+3-3
diff --git a/‎asv_bench/benchmarks/gil.py
+1-1 b/‎asv_bench/benchmarks/gil.py
+1-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+1-1 b/‎asv_bench/benchmarks/groupby.py
+1-1
diff --git a/‎asv_bench/benchmarks/indexing.py
+2-2 b/‎asv_bench/benchmarks/indexing.py
+2-2
diff --git a/‎asv_bench/benchmarks/inference.py
+2-2 b/‎asv_bench/benchmarks/inference.py
+2-2
diff --git a/‎asv_bench/benchmarks/io/csv.py
+6-6 b/‎asv_bench/benchmarks/io/csv.py
+6-6
diff --git a/‎asv_bench/benchmarks/io/excel.py
+1-1 b/‎asv_bench/benchmarks/io/excel.py
+1-1
diff --git a/‎asv_bench/benchmarks/io/hdf.py
+1-1 b/‎asv_bench/benchmarks/io/hdf.py
+1-1
diff --git a/‎asv_bench/benchmarks/io/json.py
+5-5 b/‎asv_bench/benchmarks/io/json.py
+5-5
@@ -0,0 +1,38 @@
+name: Linkcheck
+on:
+  schedule:
+  # Run monthly on the 1st day of the month
+    - cron: '0 0 1 * *'
+  pull_request:
+    paths:
+      - ".github/workflows/broken-linkcheck.yml"
+      - "doc/make.py"
+jobs:
+  linkcheck:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Conda
+        uses: ./.github/actions/setup-conda
+
+      - name: Build Pandas
+        uses: ./.github/actions/build_pandas
+
+      - name: Run linkcheck script
+        working-directory: ./doc
+        run: |
+          set -o pipefail
+          python make.py linkcheck | tee linkcheck.txt
+
+      - name: Display broken links
+        if: failure()
+        working-directory: ./doc
+        run: grep broken linkcheck.txt
@@ -69,6 +69,10 @@ jobs:
             env_file: actions-311.yaml
             pattern: "not slow and not network and not single_cpu"
             pandas_copy_on_write: "1"
+          - name: "Copy-on-Write 3.11 (warnings)"
+            env_file: actions-311.yaml
+            pattern: "not slow and not network and not single_cpu"
+            pandas_copy_on_write: "warn"
           - name: "Pypy"
             env_file: actions-pypy-39.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -94,7 +98,7 @@ jobs:
       PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
     concurrency:
       # https://github.community/t/concurrecy-not-work-for-push/183068/7
-      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
       cancel-in-progress: true
 
     services:
@@ -236,7 +240,7 @@ jobs:
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
-          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
+          python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e .
           python -m pip list --no-cache-dir
           export PANDAS_CI=1
@@ -274,7 +278,7 @@ jobs:
           /opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
           . ~/virtualenvs/pandas-dev/bin/activate
           python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
-          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
+          python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
           python -m pip install --no-cache-dir --no-build-isolation -e .
           python -m pip list --no-cache-dir
 
@@ -347,7 +351,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
           python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
           python -m pip install versioneer[toml]
-          python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
+          python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
           python -m pip install -ve . --no-build-isolation --no-index --no-deps
           python -m pip list
 
 
@@ -181,7 +181,7 @@ jobs:
         shell: pwsh
         run: |
           $TST_CMD = @"
-          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
+          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0;
           python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
           "@
 
@@ -20,11 +20,11 @@ ci:
 repos:
 -   repo: https://github.com/hauntsaninja/black-pre-commit-mirror
     # black compiled with mypyc
-    rev: 23.9.1
+    rev: 23.10.1
     hooks:
       - id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.291
+    rev: v0.1.4
     hooks:
     -   id: ruff
         args: [--exit-non-zero-on-fix]
@@ -34,14 +34,14 @@ repos:
         alias: ruff-selected-autofixes
         args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
 -   repo: https://github.com/jendrikseipp/vulture
-    rev: 'v2.9.1'
+    rev: 'v2.10'
     hooks:
       - id: vulture
         entry: python scripts/run_vulture.py
         pass_filenames: true
         require_serial: false
 -   repo: https://github.com/codespell-project/codespell
-    rev: v2.2.5
+    rev: v2.2.6
     hooks:
     -   id: codespell
         types_or: [python, rst, markdown, cython, c]
@@ -52,7 +52,7 @@ repos:
     -   id: cython-lint
     -   id: double-quote-cython-strings
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
     -   id: check-ast
     -   id: check-case-conflict
@@ -71,7 +71,7 @@ repos:
         args: [--remove]
     -   id: trailing-whitespace
 -   repo: https://github.com/pylint-dev/pylint
-    rev: v3.0.0b0
+    rev: v3.0.1
     hooks:
     -   id: pylint
         stages: [manual]
@@ -94,7 +94,7 @@ repos:
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.13.0
+    rev: v3.15.0
     hooks:
     -   id: pyupgrade
         args: [--py39-plus]
@@ -111,11 +111,11 @@ repos:
         types: [text]  # overwrite types: [rst]
         types_or: [python, rst]
 -   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.6.8
+    rev: v0.8.1
     hooks:
     - id: sphinx-lint
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: ea59a72
+    rev: v17.0.4
     hooks:
     - id: clang-format
       files: ^pandas/_libs/src|^pandas/_libs/include
 
@@ -50,9 +50,9 @@ def setup(self, unique, sort, dtype):
             "float": pd.Index(np.random.randn(N), dtype="float64"),
             "object_str": string_index,
             "object": pd.Index(np.arange(N), dtype="object"),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
             "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
             ),
             "Int64": pd.array(np.arange(N), dtype="Int64"),
             "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
@@ -93,9 +93,9 @@ def setup(self, unique, keep, dtype):
             "uint": pd.Index(np.arange(N), dtype="uint64"),
             "float": pd.Index(np.random.randn(N), dtype="float64"),
             "string": tm.makeStringIndex(N),
-            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
             "datetime64[ns, tz]": pd.date_range(
-                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
             ),
             "timestamp[ms][pyarrow]": pd.Index(
                 np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
 
@@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
     param_names = ["func"]
 
     def setup(self, func):
-        array = date_range("20200101 00:00", "20200102 0:00", freq="S")
+        array = date_range("20200101 00:00", "20200102 0:00", freq="s")
         level_0_names = [str(i) for i in range(30)]
 
         index = pd.MultiIndex.from_product([level_0_names, array])
 
@@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
         N, M = 10000, 100
         if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
             data = {
-                "datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
+                "datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
                 "datetime64[ns, tz]": date_range(
-                    "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+                    "2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
                 ),
                 "timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
             }
@@ -649,7 +649,7 @@ def time_series_nunique_nan(self):
 class Duplicated:
     def setup(self):
         n = 1 << 20
-        t = date_range("2015-01-01", freq="S", periods=(n // 64))
+        t = date_range("2015-01-01", freq="s", periods=(n // 64))
         xs = np.random.randn(n // 64).round(2)
         self.df = DataFrame(
             {
 
@@ -212,7 +212,7 @@ def run(dti):
     def time_datetime_to_period(self):
         @test_parallel(num_threads=2)
         def run(dti):
-            dti.to_period("S")
+            dti.to_period("s")
 
         run(self.dti)
 
 
@@ -238,7 +238,7 @@ def time_series_nth(self, dtype):
 
 class DateAttributes:
     def setup(self):
-        rng = date_range("1/1/2000", "12/31/2005", freq="H")
+        rng = date_range("1/1/2000", "12/31/2005", freq="h")
         self.year, self.month, self.day = rng.year, rng.month, rng.day
         self.ts = Series(np.random.randn(len(rng)), index=rng)
 
 
@@ -232,7 +232,7 @@ def setup(self, index):
         N = 100000
         indexes = {
             "int": Index(np.arange(N), dtype=np.int64),
-            "datetime": date_range("2011-01-01", freq="S", periods=N),
+            "datetime": date_range("2011-01-01", freq="s", periods=N),
         }
         index = indexes[index]
         self.s = Series(np.random.rand(N), index=index)
@@ -465,7 +465,7 @@ def time_loc_row(self, unique_cols):
 class AssignTimeseriesIndex:
     def setup(self):
         N = 100000
-        idx = date_range("1/1/2000", periods=N, freq="H")
+        idx = date_range("1/1/2000", periods=N, freq="h")
         self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx)
 
     def time_frame_assign_timeseries_index(self):
 
@@ -164,7 +164,7 @@ def time_unique_date_strings(self, cache, count):
 
 class ToDatetimeISO8601:
     def setup(self):
-        rng = date_range(start="1/1/2000", periods=20000, freq="H")
+        rng = date_range(start="1/1/2000", periods=20000, freq="h")
         self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
         self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
         self.strings_tz_space = [
@@ -276,7 +276,7 @@ def time_dup_string_tzoffset_dates(self, cache):
 # GH 43901
 class ToDatetimeInferDatetimeFormat:
     def setup(self):
-        rng = date_range(start="1/1/2000", periods=100000, freq="H")
+        rng = date_range(start="1/1/2000", periods=100000, freq="h")
         self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
 
     def time_infer_datetime_format(self):
 
@@ -89,7 +89,7 @@ class ToCSVDatetimeIndex(BaseIO):
     fname = "__test__.csv"
 
     def setup(self):
-        rng = date_range("2000", periods=100_000, freq="S")
+        rng = date_range("2000", periods=100_000, freq="s")
         self.data = DataFrame({"a": 1}, index=rng)
 
     def time_frame_date_formatting_index(self):
@@ -102,15 +102,15 @@ def time_frame_date_no_format_index(self):
 class ToCSVPeriod(BaseIO):
     fname = "__test__.csv"
 
-    params = ([1000, 10000], ["D", "H"])
+    params = ([1000, 10000], ["D", "h"])
     param_names = ["nobs", "freq"]
 
     def setup(self, nobs, freq):
         rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
         self.data = DataFrame(rng)
         if freq == "D":
             self.default_fmt = "%Y-%m-%d"
-        elif freq == "H":
+        elif freq == "h":
             self.default_fmt = "%Y-%m-%d %H:00"
 
     def time_frame_period_formatting_default(self, nobs, freq):
@@ -130,15 +130,15 @@ def time_frame_period_formatting(self, nobs, freq):
 class ToCSVPeriodIndex(BaseIO):
     fname = "__test__.csv"
 
-    params = ([1000, 10000], ["D", "H"])
+    params = ([1000, 10000], ["D", "h"])
     param_names = ["nobs", "freq"]
 
     def setup(self, nobs, freq):
         rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
         self.data = DataFrame({"a": 1}, index=rng)
         if freq == "D":
             self.default_fmt = "%Y-%m-%d"
-        elif freq == "H":
+        elif freq == "h":
             self.default_fmt = "%Y-%m-%d %H:00"
 
     def time_frame_period_formatting_index(self, nobs, freq):
@@ -253,7 +253,7 @@ class ReadCSVConcatDatetime(StringIORewind):
     iso8601 = "%Y-%m-%d %H:%M:%S"
 
     def setup(self):
-        rng = date_range("1/1/2000", periods=50000, freq="S")
+        rng = date_range("1/1/2000", periods=50000, freq="s")
         self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist()))
 
     def time_read_csv(self):
 
@@ -25,7 +25,7 @@ def _generate_dataframe():
     df = DataFrame(
         np.random.randn(N, C),
         columns=[f"float{i}" for i in range(C)],
-        index=date_range("20000101", periods=N, freq="H"),
+        index=date_range("20000101", periods=N, freq="h"),
     )
     df["object"] = tm.makeStringIndex(N)
     return df
 
@@ -122,7 +122,7 @@ def setup(self, format):
         self.df = DataFrame(
             np.random.randn(N, C),
             columns=[f"float{i}" for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
+            index=date_range("20000101", periods=N, freq="h"),
         )
         self.df["object"] = tm.makeStringIndex(N)
         self.df.to_hdf(self.fname, "df", format=format)
 
@@ -26,7 +26,7 @@ def setup(self, orient, index):
         N = 100000
         indexes = {
             "int": np.arange(N),
-            "datetime": date_range("20000101", periods=N, freq="H"),
+            "datetime": date_range("20000101", periods=N, freq="h"),
         }
         df = DataFrame(
             np.random.randn(N, 5),
@@ -48,7 +48,7 @@ def setup(self, index):
         N = 100000
         indexes = {
             "int": np.arange(N),
-            "datetime": date_range("20000101", periods=N, freq="H"),
+            "datetime": date_range("20000101", periods=N, freq="h"),
         }
         df = DataFrame(
             np.random.randn(N, 5),
@@ -108,7 +108,7 @@ class ToJSON(BaseIO):
     def setup(self, orient, frame):
         N = 10**5
         ncols = 5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         ints = np.random.randint(100000000, size=N)
@@ -191,7 +191,7 @@ class ToJSONISO(BaseIO):
 
     def setup(self, orient):
         N = 10**5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         self.df = DataFrame(
@@ -214,7 +214,7 @@ class ToJSONLines(BaseIO):
     def setup(self):
         N = 10**5
         ncols = 5
-        index = date_range("20000101", periods=N, freq="H")
+        index = date_range("20000101", periods=N, freq="h")
         timedeltas = timedelta_range(start=1, periods=N, freq="s")
         datetimes = date_range(start=1, periods=N, freq="s")
         ints = np.random.randint(100000000, size=N)
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ def _generate_dataframe():`
`25`	`25`	`df = DataFrame(`
`26`	`26`	`np.random.randn(N, C),`
`27`	`27`	`columns=[f"float{i}" for i in range(C)],`
`28`		`- index=date_range("20000101", periods=N, freq="H"),`
	`28`	`+ index=date_range("20000101", periods=N, freq="h"),`
`29`	`29`	`)`
`30`	`30`	`df["object"] = tm.makeStringIndex(N)`
`31`	`31`	`return df`
Original file line number	Diff line number	Diff line change
`@@ -122,7 +122,7 @@ def setup(self, format):`
`122`	`122`	`self.df = DataFrame(`
`123`	`123`	`np.random.randn(N, C),`
`124`	`124`	`columns=[f"float{i}" for i in range(C)],`
`125`		`- index=date_range("20000101", periods=N, freq="H"),`
	`125`	`+ index=date_range("20000101", periods=N, freq="h"),`
`126`	`126`	`)`
`127`	`127`	`self.df["object"] = tm.makeStringIndex(N)`
`128`	`128`	`self.df.to_hdf(self.fname, "df", format=format)`