Skip to content

Commit 88e44b3

Browse files
committed
Merge branch 'main' into enh-stata-non-nano
2 parents 38fad02 + 76d28c7 commit 88e44b3

File tree

367 files changed

+9404
-4703
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

367 files changed

+9404
-4703
lines changed
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Linkcheck
2+
on:
3+
schedule:
4+
# Run monthly on the 1st day of the month
5+
- cron: '0 0 1 * *'
6+
pull_request:
7+
paths:
8+
- ".github/workflows/broken-linkcheck.yml"
9+
- "doc/make.py"
10+
jobs:
11+
linkcheck:
12+
runs-on: ubuntu-latest
13+
defaults:
14+
run:
15+
shell: bash -el {0}
16+
17+
steps:
18+
- name: Checkout
19+
uses: actions/checkout@v4
20+
with:
21+
fetch-depth: 0
22+
23+
- name: Set up Conda
24+
uses: ./.github/actions/setup-conda
25+
26+
- name: Build Pandas
27+
uses: ./.github/actions/build_pandas
28+
29+
- name: Run linkcheck script
30+
working-directory: ./doc
31+
run: |
32+
set -o pipefail
33+
python make.py linkcheck | tee linkcheck.txt
34+
35+
- name: Display broken links
36+
if: failure()
37+
working-directory: ./doc
38+
run: grep broken linkcheck.txt

.github/workflows/unit-tests.yml

+8-4
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ jobs:
6969
env_file: actions-311.yaml
7070
pattern: "not slow and not network and not single_cpu"
7171
pandas_copy_on_write: "1"
72+
- name: "Copy-on-Write 3.11 (warnings)"
73+
env_file: actions-311.yaml
74+
pattern: "not slow and not network and not single_cpu"
75+
pandas_copy_on_write: "warn"
7276
- name: "Pypy"
7377
env_file: actions-pypy-39.yaml
7478
pattern: "not slow and not network and not single_cpu"
@@ -94,7 +98,7 @@ jobs:
9498
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
9599
concurrency:
96100
# https://github.community/t/concurrecy-not-work-for-push/183068/7
97-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
101+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
98102
cancel-in-progress: true
99103

100104
services:
@@ -236,7 +240,7 @@ jobs:
236240
. ~/virtualenvs/pandas-dev/bin/activate
237241
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
238242
python -m pip install numpy --config-settings=setup-args="-Dallow-noblas=true"
239-
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
243+
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
240244
python -m pip install --no-cache-dir --no-build-isolation -e .
241245
python -m pip list --no-cache-dir
242246
export PANDAS_CI=1
@@ -274,7 +278,7 @@ jobs:
274278
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
275279
. ~/virtualenvs/pandas-dev/bin/activate
276280
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
277-
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
281+
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.3.2 pytest-xdist>=2.2.0 hypothesis>=6.46.1
278282
python -m pip install --no-cache-dir --no-build-isolation -e .
279283
python -m pip list --no-cache-dir
280284
@@ -347,7 +351,7 @@ jobs:
347351
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
348352
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
349353
python -m pip install versioneer[toml]
350-
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
354+
python -m pip install python-dateutil pytz tzdata cython hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-cov
351355
python -m pip install -ve . --no-build-isolation --no-index --no-deps
352356
python -m pip list
353357

.github/workflows/wheels.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ jobs:
181181
shell: pwsh
182182
run: |
183183
$TST_CMD = @"
184-
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
184+
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0;
185185
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
186186
python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
187187
"@

.pre-commit-config.yaml

+9-9
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ ci:
2020
repos:
2121
- repo: https://github.com/hauntsaninja/black-pre-commit-mirror
2222
# black compiled with mypyc
23-
rev: 23.9.1
23+
rev: 23.10.1
2424
hooks:
2525
- id: black
2626
- repo: https://github.com/astral-sh/ruff-pre-commit
27-
rev: v0.0.291
27+
rev: v0.1.4
2828
hooks:
2929
- id: ruff
3030
args: [--exit-non-zero-on-fix]
@@ -34,14 +34,14 @@ repos:
3434
alias: ruff-selected-autofixes
3535
args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
3636
- repo: https://github.com/jendrikseipp/vulture
37-
rev: 'v2.9.1'
37+
rev: 'v2.10'
3838
hooks:
3939
- id: vulture
4040
entry: python scripts/run_vulture.py
4141
pass_filenames: true
4242
require_serial: false
4343
- repo: https://github.com/codespell-project/codespell
44-
rev: v2.2.5
44+
rev: v2.2.6
4545
hooks:
4646
- id: codespell
4747
types_or: [python, rst, markdown, cython, c]
@@ -52,7 +52,7 @@ repos:
5252
- id: cython-lint
5353
- id: double-quote-cython-strings
5454
- repo: https://github.com/pre-commit/pre-commit-hooks
55-
rev: v4.4.0
55+
rev: v4.5.0
5656
hooks:
5757
- id: check-ast
5858
- id: check-case-conflict
@@ -71,7 +71,7 @@ repos:
7171
args: [--remove]
7272
- id: trailing-whitespace
7373
- repo: https://github.com/pylint-dev/pylint
74-
rev: v3.0.0b0
74+
rev: v3.0.1
7575
hooks:
7676
- id: pylint
7777
stages: [manual]
@@ -94,7 +94,7 @@ repos:
9494
hooks:
9595
- id: isort
9696
- repo: https://github.com/asottile/pyupgrade
97-
rev: v3.13.0
97+
rev: v3.15.0
9898
hooks:
9999
- id: pyupgrade
100100
args: [--py39-plus]
@@ -111,11 +111,11 @@ repos:
111111
types: [text] # overwrite types: [rst]
112112
types_or: [python, rst]
113113
- repo: https://github.com/sphinx-contrib/sphinx-lint
114-
rev: v0.6.8
114+
rev: v0.8.1
115115
hooks:
116116
- id: sphinx-lint
117117
- repo: https://github.com/pre-commit/mirrors-clang-format
118-
rev: ea59a72
118+
rev: v17.0.4
119119
hooks:
120120
- id: clang-format
121121
files: ^pandas/_libs/src|^pandas/_libs/include

asv_bench/benchmarks/algorithms.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ def setup(self, unique, sort, dtype):
5050
"float": pd.Index(np.random.randn(N), dtype="float64"),
5151
"object_str": string_index,
5252
"object": pd.Index(np.arange(N), dtype="object"),
53-
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
53+
"datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
5454
"datetime64[ns, tz]": pd.date_range(
55-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
55+
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
5656
),
5757
"Int64": pd.array(np.arange(N), dtype="Int64"),
5858
"boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
@@ -93,9 +93,9 @@ def setup(self, unique, keep, dtype):
9393
"uint": pd.Index(np.arange(N), dtype="uint64"),
9494
"float": pd.Index(np.random.randn(N), dtype="float64"),
9595
"string": tm.makeStringIndex(N),
96-
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
96+
"datetime64[ns]": pd.date_range("2011-01-01", freq="h", periods=N),
9797
"datetime64[ns, tz]": pd.date_range(
98-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
98+
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
9999
),
100100
"timestamp[ms][pyarrow]": pd.Index(
101101
np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
491491
param_names = ["func"]
492492

493493
def setup(self, func):
494-
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
494+
array = date_range("20200101 00:00", "20200102 0:00", freq="s")
495495
level_0_names = [str(i) for i in range(30)]
496496

497497
index = pd.MultiIndex.from_product([level_0_names, array])

asv_bench/benchmarks/frame_methods.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
439439
N, M = 10000, 100
440440
if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
441441
data = {
442-
"datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
442+
"datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
443443
"datetime64[ns, tz]": date_range(
444-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
444+
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
445445
),
446446
"timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
447447
}
@@ -649,7 +649,7 @@ def time_series_nunique_nan(self):
649649
class Duplicated:
650650
def setup(self):
651651
n = 1 << 20
652-
t = date_range("2015-01-01", freq="S", periods=(n // 64))
652+
t = date_range("2015-01-01", freq="s", periods=(n // 64))
653653
xs = np.random.randn(n // 64).round(2)
654654
self.df = DataFrame(
655655
{

asv_bench/benchmarks/gil.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def run(dti):
212212
def time_datetime_to_period(self):
213213
@test_parallel(num_threads=2)
214214
def run(dti):
215-
dti.to_period("S")
215+
dti.to_period("s")
216216

217217
run(self.dti)
218218

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def time_series_nth(self, dtype):
238238

239239
class DateAttributes:
240240
def setup(self):
241-
rng = date_range("1/1/2000", "12/31/2005", freq="H")
241+
rng = date_range("1/1/2000", "12/31/2005", freq="h")
242242
self.year, self.month, self.day = rng.year, rng.month, rng.day
243243
self.ts = Series(np.random.randn(len(rng)), index=rng)
244244

asv_bench/benchmarks/indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def setup(self, index):
232232
N = 100000
233233
indexes = {
234234
"int": Index(np.arange(N), dtype=np.int64),
235-
"datetime": date_range("2011-01-01", freq="S", periods=N),
235+
"datetime": date_range("2011-01-01", freq="s", periods=N),
236236
}
237237
index = indexes[index]
238238
self.s = Series(np.random.rand(N), index=index)
@@ -465,7 +465,7 @@ def time_loc_row(self, unique_cols):
465465
class AssignTimeseriesIndex:
466466
def setup(self):
467467
N = 100000
468-
idx = date_range("1/1/2000", periods=N, freq="H")
468+
idx = date_range("1/1/2000", periods=N, freq="h")
469469
self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx)
470470

471471
def time_frame_assign_timeseries_index(self):

asv_bench/benchmarks/inference.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def time_unique_date_strings(self, cache, count):
164164

165165
class ToDatetimeISO8601:
166166
def setup(self):
167-
rng = date_range(start="1/1/2000", periods=20000, freq="H")
167+
rng = date_range(start="1/1/2000", periods=20000, freq="h")
168168
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
169169
self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist()
170170
self.strings_tz_space = [
@@ -276,7 +276,7 @@ def time_dup_string_tzoffset_dates(self, cache):
276276
# GH 43901
277277
class ToDatetimeInferDatetimeFormat:
278278
def setup(self):
279-
rng = date_range(start="1/1/2000", periods=100000, freq="H")
279+
rng = date_range(start="1/1/2000", periods=100000, freq="h")
280280
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
281281

282282
def time_infer_datetime_format(self):

asv_bench/benchmarks/io/csv.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class ToCSVDatetimeIndex(BaseIO):
8989
fname = "__test__.csv"
9090

9191
def setup(self):
92-
rng = date_range("2000", periods=100_000, freq="S")
92+
rng = date_range("2000", periods=100_000, freq="s")
9393
self.data = DataFrame({"a": 1}, index=rng)
9494

9595
def time_frame_date_formatting_index(self):
@@ -102,15 +102,15 @@ def time_frame_date_no_format_index(self):
102102
class ToCSVPeriod(BaseIO):
103103
fname = "__test__.csv"
104104

105-
params = ([1000, 10000], ["D", "H"])
105+
params = ([1000, 10000], ["D", "h"])
106106
param_names = ["nobs", "freq"]
107107

108108
def setup(self, nobs, freq):
109109
rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
110110
self.data = DataFrame(rng)
111111
if freq == "D":
112112
self.default_fmt = "%Y-%m-%d"
113-
elif freq == "H":
113+
elif freq == "h":
114114
self.default_fmt = "%Y-%m-%d %H:00"
115115

116116
def time_frame_period_formatting_default(self, nobs, freq):
@@ -130,15 +130,15 @@ def time_frame_period_formatting(self, nobs, freq):
130130
class ToCSVPeriodIndex(BaseIO):
131131
fname = "__test__.csv"
132132

133-
params = ([1000, 10000], ["D", "H"])
133+
params = ([1000, 10000], ["D", "h"])
134134
param_names = ["nobs", "freq"]
135135

136136
def setup(self, nobs, freq):
137137
rng = period_range(start="2000-01-01", periods=nobs, freq=freq)
138138
self.data = DataFrame({"a": 1}, index=rng)
139139
if freq == "D":
140140
self.default_fmt = "%Y-%m-%d"
141-
elif freq == "H":
141+
elif freq == "h":
142142
self.default_fmt = "%Y-%m-%d %H:00"
143143

144144
def time_frame_period_formatting_index(self, nobs, freq):
@@ -253,7 +253,7 @@ class ReadCSVConcatDatetime(StringIORewind):
253253
iso8601 = "%Y-%m-%d %H:%M:%S"
254254

255255
def setup(self):
256-
rng = date_range("1/1/2000", periods=50000, freq="S")
256+
rng = date_range("1/1/2000", periods=50000, freq="s")
257257
self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist()))
258258

259259
def time_read_csv(self):

asv_bench/benchmarks/io/excel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _generate_dataframe():
2525
df = DataFrame(
2626
np.random.randn(N, C),
2727
columns=[f"float{i}" for i in range(C)],
28-
index=date_range("20000101", periods=N, freq="H"),
28+
index=date_range("20000101", periods=N, freq="h"),
2929
)
3030
df["object"] = tm.makeStringIndex(N)
3131
return df

asv_bench/benchmarks/io/hdf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def setup(self, format):
122122
self.df = DataFrame(
123123
np.random.randn(N, C),
124124
columns=[f"float{i}" for i in range(C)],
125-
index=date_range("20000101", periods=N, freq="H"),
125+
index=date_range("20000101", periods=N, freq="h"),
126126
)
127127
self.df["object"] = tm.makeStringIndex(N)
128128
self.df.to_hdf(self.fname, "df", format=format)

asv_bench/benchmarks/io/json.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def setup(self, orient, index):
2626
N = 100000
2727
indexes = {
2828
"int": np.arange(N),
29-
"datetime": date_range("20000101", periods=N, freq="H"),
29+
"datetime": date_range("20000101", periods=N, freq="h"),
3030
}
3131
df = DataFrame(
3232
np.random.randn(N, 5),
@@ -48,7 +48,7 @@ def setup(self, index):
4848
N = 100000
4949
indexes = {
5050
"int": np.arange(N),
51-
"datetime": date_range("20000101", periods=N, freq="H"),
51+
"datetime": date_range("20000101", periods=N, freq="h"),
5252
}
5353
df = DataFrame(
5454
np.random.randn(N, 5),
@@ -108,7 +108,7 @@ class ToJSON(BaseIO):
108108
def setup(self, orient, frame):
109109
N = 10**5
110110
ncols = 5
111-
index = date_range("20000101", periods=N, freq="H")
111+
index = date_range("20000101", periods=N, freq="h")
112112
timedeltas = timedelta_range(start=1, periods=N, freq="s")
113113
datetimes = date_range(start=1, periods=N, freq="s")
114114
ints = np.random.randint(100000000, size=N)
@@ -191,7 +191,7 @@ class ToJSONISO(BaseIO):
191191

192192
def setup(self, orient):
193193
N = 10**5
194-
index = date_range("20000101", periods=N, freq="H")
194+
index = date_range("20000101", periods=N, freq="h")
195195
timedeltas = timedelta_range(start=1, periods=N, freq="s")
196196
datetimes = date_range(start=1, periods=N, freq="s")
197197
self.df = DataFrame(
@@ -214,7 +214,7 @@ class ToJSONLines(BaseIO):
214214
def setup(self):
215215
N = 10**5
216216
ncols = 5
217-
index = date_range("20000101", periods=N, freq="H")
217+
index = date_range("20000101", periods=N, freq="h")
218218
timedeltas = timedelta_range(start=1, periods=N, freq="s")
219219
datetimes = date_range(start=1, periods=N, freq="s")
220220
ints = np.random.randint(100000000, size=N)

0 commit comments

Comments
 (0)