Skip to content

Commit 95bdf96

Browse files
authored
Merge branch 'main' into cow-warning-chained
2 parents 1006fdd + 4514636 commit 95bdf96

File tree

406 files changed

+7462
-4895
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

406 files changed

+7462
-4895
lines changed
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Linkcheck
2+
on:
3+
schedule:
4+
# Run monthly on the 1st day of the month
5+
- cron: '0 0 1 * *'
6+
pull_request:
7+
paths:
8+
- ".github/workflows/broken-linkcheck.yml"
9+
- "doc/make.py"
10+
jobs:
11+
linkcheck:
12+
runs-on: ubuntu-latest
13+
defaults:
14+
run:
15+
shell: bash -el {0}
16+
17+
steps:
18+
- name: Checkout
19+
uses: actions/checkout@v4
20+
with:
21+
fetch-depth: 0
22+
23+
- name: Set up Conda
24+
uses: ./.github/actions/setup-conda
25+
26+
- name: Build Pandas
27+
uses: ./.github/actions/build_pandas
28+
29+
- name: Run linkcheck script
30+
working-directory: ./doc
31+
run: |
32+
set -o pipefail
33+
python make.py linkcheck | tee linkcheck.txt
34+
35+
- name: Display broken links
36+
if: failure()
37+
working-directory: ./doc
38+
run: grep broken linkcheck.txt

.github/workflows/unit-tests.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ defaults:
2323
jobs:
2424
ubuntu:
2525
runs-on: ubuntu-22.04
26-
timeout-minutes: 180
26+
timeout-minutes: 90
2727
strategy:
2828
matrix:
2929
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml]
@@ -177,7 +177,7 @@ jobs:
177177
if: ${{ matrix.pattern == '' && (always() && steps.build.outcome == 'success')}}
178178

179179
macos-windows:
180-
timeout-minutes: 180
180+
timeout-minutes: 90
181181
strategy:
182182
matrix:
183183
os: [macos-latest, windows-latest]
@@ -322,7 +322,7 @@ jobs:
322322
matrix:
323323
os: [ubuntu-22.04, macOS-latest, windows-latest]
324324

325-
timeout-minutes: 180
325+
timeout-minutes: 90
326326

327327
concurrency:
328328
#https://github.community/t/concurrecy-not-work-for-push/183068/7

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"Cython": ["0.29.33"],
44+
"Cython": ["3.0.5"],
4545
"matplotlib": [],
4646
"sqlalchemy": [],
4747
"scipy": [],

asv_bench/benchmarks/algorithms.py

+47-53
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from importlib import import_module
22

33
import numpy as np
4-
import pyarrow as pa
54

65
import pandas as pd
76

@@ -20,9 +19,9 @@ class Factorize:
2019
[True, False],
2120
[True, False],
2221
[
23-
"int",
24-
"uint",
25-
"float",
22+
"int64",
23+
"uint64",
24+
"float64",
2625
"object",
2726
"object_str",
2827
"datetime64[ns]",
@@ -36,28 +35,24 @@ class Factorize:
3635

3736
def setup(self, unique, sort, dtype):
3837
N = 10**5
39-
string_index = tm.makeStringIndex(N)
40-
string_arrow = None
41-
if dtype == "string[pyarrow]":
42-
try:
43-
string_arrow = pd.array(string_index, dtype="string[pyarrow]")
44-
except ImportError:
45-
raise NotImplementedError
46-
47-
data = {
48-
"int": pd.Index(np.arange(N), dtype="int64"),
49-
"uint": pd.Index(np.arange(N), dtype="uint64"),
50-
"float": pd.Index(np.random.randn(N), dtype="float64"),
51-
"object_str": string_index,
52-
"object": pd.Index(np.arange(N), dtype="object"),
53-
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
54-
"datetime64[ns, tz]": pd.date_range(
55-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
56-
),
57-
"Int64": pd.array(np.arange(N), dtype="Int64"),
58-
"boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
59-
"string[pyarrow]": string_arrow,
60-
}[dtype]
38+
39+
if dtype in ["int64", "uint64", "Int64", "object"]:
40+
data = pd.Index(np.arange(N), dtype=dtype)
41+
elif dtype == "float64":
42+
data = pd.Index(np.random.randn(N), dtype=dtype)
43+
elif dtype == "boolean":
44+
data = pd.array(np.random.randint(0, 2, N), dtype=dtype)
45+
elif dtype == "datetime64[ns]":
46+
data = pd.date_range("2011-01-01", freq="h", periods=N)
47+
elif dtype == "datetime64[ns, tz]":
48+
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
49+
elif dtype == "object_str":
50+
data = tm.makeStringIndex(N)
51+
elif dtype == "string[pyarrow]":
52+
data = pd.array(tm.makeStringIndex(N), dtype="string[pyarrow]")
53+
else:
54+
raise NotImplementedError
55+
6156
if not unique:
6257
data = data.repeat(5)
6358
self.data = data
@@ -74,9 +69,9 @@ class Duplicated:
7469
[True, False],
7570
["first", "last", False],
7671
[
77-
"int",
78-
"uint",
79-
"float",
72+
"int64",
73+
"uint64",
74+
"float64",
8075
"string",
8176
"datetime64[ns]",
8277
"datetime64[ns, tz]",
@@ -88,22 +83,20 @@ class Duplicated:
8883

8984
def setup(self, unique, keep, dtype):
9085
N = 10**5
91-
data = {
92-
"int": pd.Index(np.arange(N), dtype="int64"),
93-
"uint": pd.Index(np.arange(N), dtype="uint64"),
94-
"float": pd.Index(np.random.randn(N), dtype="float64"),
95-
"string": tm.makeStringIndex(N),
96-
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
97-
"datetime64[ns, tz]": pd.date_range(
98-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
99-
),
100-
"timestamp[ms][pyarrow]": pd.Index(
101-
np.arange(N), dtype=pd.ArrowDtype(pa.timestamp("ms"))
102-
),
103-
"duration[s][pyarrow]": pd.Index(
104-
np.arange(N), dtype=pd.ArrowDtype(pa.duration("s"))
105-
),
106-
}[dtype]
86+
if dtype in ["int64", "uint64"]:
87+
data = pd.Index(np.arange(N), dtype=dtype)
88+
elif dtype == "float64":
89+
data = pd.Index(np.random.randn(N), dtype="float64")
90+
elif dtype == "string":
91+
data = tm.makeStringIndex(N)
92+
elif dtype == "datetime64[ns]":
93+
data = pd.date_range("2011-01-01", freq="h", periods=N)
94+
elif dtype == "datetime64[ns, tz]":
95+
data = pd.date_range("2011-01-01", freq="h", periods=N, tz="Asia/Tokyo")
96+
elif dtype in ["timestamp[ms][pyarrow]", "duration[s][pyarrow]"]:
97+
data = pd.Index(np.arange(N), dtype=dtype)
98+
else:
99+
raise NotImplementedError
107100
if not unique:
108101
data = data.repeat(5)
109102
self.idx = data
@@ -181,21 +174,22 @@ class Quantile:
181174
params = [
182175
[0, 0.5, 1],
183176
["linear", "nearest", "lower", "higher", "midpoint"],
184-
["float", "int", "uint"],
177+
["float64", "int64", "uint64"],
185178
]
186179
param_names = ["quantile", "interpolation", "dtype"]
187180

188181
def setup(self, quantile, interpolation, dtype):
189182
N = 10**5
190-
data = {
191-
"int": np.arange(N),
192-
"uint": np.arange(N).astype(np.uint64),
193-
"float": np.random.randn(N),
194-
}
195-
self.idx = pd.Series(data[dtype].repeat(5))
183+
if dtype in ["int64", "uint64"]:
184+
data = np.arange(N, dtype=dtype)
185+
elif dtype == "float64":
186+
data = np.random.randn(N)
187+
else:
188+
raise NotImplementedError
189+
self.ser = pd.Series(data.repeat(5))
196190

197191
def time_quantile(self, quantile, interpolation, dtype):
198-
self.idx.quantile(quantile, interpolation=interpolation)
192+
self.ser.quantile(quantile, interpolation=interpolation)
199193

200194

201195
class SortIntegerArray:

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ class BinaryOpsMultiIndex:
491491
param_names = ["func"]
492492

493493
def setup(self, func):
494-
array = date_range("20200101 00:00", "20200102 0:00", freq="S")
494+
array = date_range("20200101 00:00", "20200102 0:00", freq="s")
495495
level_0_names = [str(i) for i in range(30)]
496496

497497
index = pd.MultiIndex.from_product([level_0_names, array])

asv_bench/benchmarks/array.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ def time_from_float_array(self):
3131
class IntegerArray:
3232
def setup(self):
3333
N = 250_000
34-
self.values_integer = np.array([1, 0, 1, 0] * N)
35-
self.data = np.array([1, 2, 3, 4] * N, dtype="int64")
36-
self.mask = np.array([False, False, True, False] * N)
34+
self.values_integer = np.tile(np.array([1, 0, 1, 0]), N)
35+
self.data = np.tile(np.array([1, 2, 3, 4], dtype="int64"), N)
36+
self.mask = np.tile(np.array([False, False, True, False]), N)
3737

3838
def time_constructor(self):
3939
pd.arrays.IntegerArray(self.data, self.mask)

asv_bench/benchmarks/categoricals.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -260,18 +260,16 @@ class CategoricalSlicing:
260260
def setup(self, index):
261261
N = 10**6
262262
categories = ["a", "b", "c"]
263-
values = [0] * N + [1] * N + [2] * N
264263
if index == "monotonic_incr":
265-
self.data = pd.Categorical.from_codes(values, categories=categories)
264+
codes = np.repeat([0, 1, 2], N)
266265
elif index == "monotonic_decr":
267-
self.data = pd.Categorical.from_codes(
268-
list(reversed(values)), categories=categories
269-
)
266+
codes = np.repeat([2, 1, 0], N)
270267
elif index == "non_monotonic":
271-
self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
268+
codes = np.tile([0, 1, 2], N)
272269
else:
273270
raise ValueError(f"Invalid index param: {index}")
274271

272+
self.data = pd.Categorical.from_codes(codes, categories=categories)
275273
self.scalar = 10000
276274
self.list = list(range(10000))
277275
self.cat_scalar = "b"

asv_bench/benchmarks/frame_methods.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ def setup(self, inplace, dtype):
439439
N, M = 10000, 100
440440
if dtype in ("datetime64[ns]", "datetime64[ns, tz]", "timedelta64[ns]"):
441441
data = {
442-
"datetime64[ns]": date_range("2011-01-01", freq="H", periods=N),
442+
"datetime64[ns]": date_range("2011-01-01", freq="h", periods=N),
443443
"datetime64[ns, tz]": date_range(
444-
"2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
444+
"2011-01-01", freq="h", periods=N, tz="Asia/Tokyo"
445445
),
446446
"timedelta64[ns]": timedelta_range(start="1 day", periods=N, freq="1D"),
447447
}
@@ -640,7 +640,8 @@ def time_frame_nunique(self):
640640

641641
class SeriesNuniqueWithNan:
642642
def setup(self):
643-
self.ser = Series(100000 * (100 * [np.nan] + list(range(100)))).astype(float)
643+
values = 100 * [np.nan] + list(range(100))
644+
self.ser = Series(np.tile(values, 10000), dtype=float)
644645

645646
def time_series_nunique_nan(self):
646647
self.ser.nunique()
@@ -649,7 +650,7 @@ def time_series_nunique_nan(self):
649650
class Duplicated:
650651
def setup(self):
651652
n = 1 << 20
652-
t = date_range("2015-01-01", freq="S", periods=(n // 64))
653+
t = date_range("2015-01-01", freq="s", periods=(n // 64))
653654
xs = np.random.randn(n // 64).round(2)
654655
self.df = DataFrame(
655656
{

asv_bench/benchmarks/gil.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def run(dti):
212212
def time_datetime_to_period(self):
213213
@test_parallel(num_threads=2)
214214
def run(dti):
215-
dti.to_period("S")
215+
dti.to_period("s")
216216

217217
run(self.dti)
218218

@@ -272,18 +272,20 @@ class ParallelReadCSV(BaseIO):
272272
def setup(self, dtype):
273273
rows = 10000
274274
cols = 50
275-
data = {
276-
"float": DataFrame(np.random.randn(rows, cols)),
277-
"datetime": DataFrame(
275+
if dtype == "float":
276+
df = DataFrame(np.random.randn(rows, cols))
277+
elif dtype == "datetime":
278+
df = DataFrame(
278279
np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
279-
),
280-
"object": DataFrame(
280+
)
281+
elif dtype == "object":
282+
df = DataFrame(
281283
"foo", index=range(rows), columns=["object%03d" for _ in range(5)]
282-
),
283-
}
284+
)
285+
else:
286+
raise NotImplementedError
284287

285288
self.fname = f"__test_{dtype}__.csv"
286-
df = data[dtype]
287289
df.to_csv(self.fname)
288290

289291
@test_parallel(num_threads=2)

0 commit comments

Comments
 (0)