Skip to content

Commit 192653c

Browse files
authored
Merge branch 'main' into issue-31243
2 parents 050ec0a + 3bf2cb1 commit 192653c

File tree

238 files changed

+4134
-2192
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

238 files changed

+4134
-2192
lines changed

.github/workflows/code-checks.yml

+10-11
Original file line numberDiff line numberDiff line change
@@ -67,15 +67,6 @@ jobs:
6767
environment-file: ${{ env.ENV_FILE }}
6868
use-only-tar-bz2: true
6969

70-
- name: Install node.js (for pyright)
71-
uses: actions/setup-node@v3
72-
with:
73-
node-version: "16"
74-
75-
- name: Install pyright
76-
# note: keep version in sync with .pre-commit-config.yaml
77-
run: npm install -g [email protected]
78-
7970
- name: Build Pandas
8071
id: build
8172
uses: ./.github/actions/build_pandas
@@ -96,8 +87,16 @@ jobs:
9687
run: ci/code_checks.sh docstrings
9788
if: ${{ steps.build.outcome == 'success' }}
9889

99-
- name: Run typing validation
100-
run: ci/code_checks.sh typing
90+
- name: Use existing environment for type checking
91+
run: |
92+
echo $PATH >> $GITHUB_PATH
93+
echo "PYTHONHOME=$PYTHONHOME" >> $GITHUB_ENV
94+
echo "PYTHONPATH=$PYTHONPATH" >> $GITHUB_ENV
95+
96+
- name: Typing
97+
uses: pre-commit/[email protected]
98+
with:
99+
extra_args: --hook-stage manual --all-files
101100
if: ${{ steps.build.outcome == 'success' }}
102101

103102
- name: Run docstring validation script tests

.github/workflows/python-dev.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,10 @@ jobs:
5454
with:
5555
python-version: '3.11-dev'
5656

57-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
5857
- name: Install dependencies
5958
shell: bash -el {0}
6059
run: |
61-
python -m pip install --upgrade pip "setuptools<60.0.0" wheel
60+
python -m pip install --upgrade pip setuptools wheel
6261
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
6362
pip install git+https://github.com/nedbat/coveragepy.git
6463
pip install cython python-dateutil pytz hypothesis pytest>=6.2.5 pytest-xdist pytest-cov

.github/workflows/sdist.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ jobs:
4141
with:
4242
python-version: ${{ matrix.python-version }}
4343

44-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
4544
- name: Install dependencies
4645
run: |
47-
python -m pip install --upgrade pip "setuptools<60.0.0" wheel
46+
python -m pip install --upgrade pip setuptools wheel
4847
4948
# GH 39416
5049
pip install numpy
@@ -66,10 +65,8 @@ jobs:
6665
channels: conda-forge
6766
python-version: '${{ matrix.python-version }}'
6867

69-
# TODO: GH#44980 https://github.com/pypa/setuptools/issues/2941
7068
- name: Install pandas from sdist
7169
run: |
72-
python -m pip install --upgrade "setuptools<60.0.0"
7370
pip list
7471
python -m pip install dist/*.gz
7572

.pre-commit-config.yaml

+17-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
minimum_pre_commit_version: 2.9.2
1+
minimum_pre_commit_version: 2.15.0
22
exclude: ^LICENSES/|\.(html|csv|svg)$
3+
# reserve "manual" for mypy and pyright
4+
default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
35
ci:
46
autofix_prs: false
57
repos:
@@ -31,7 +33,9 @@ repos:
3133
- id: debug-statements
3234
- id: end-of-file-fixer
3335
exclude: \.txt$
36+
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
3437
- id: trailing-whitespace
38+
stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
3539
- repo: https://github.com/cpplint/cpplint
3640
rev: 1.6.0
3741
hooks:
@@ -84,12 +88,22 @@ repos:
8488
- id: pyright
8589
name: pyright
8690
entry: pyright
91+
# note: assumes python env is setup and activated
8792
language: node
8893
pass_filenames: false
8994
types: [python]
9095
stages: [manual]
91-
# note: keep version in sync with .github/workflows/code-checks.yml
92-
additional_dependencies: ['[email protected]']
96+
additional_dependencies: ['[email protected]']
97+
- repo: local
98+
hooks:
99+
- id: mypy
100+
name: mypy
101+
entry: mypy
102+
# note: assumes python env is setup and activated
103+
language: system
104+
pass_filenames: false
105+
types: [python]
106+
stages: [manual]
93107
- repo: local
94108
hooks:
95109
- id: flake8-rst

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def time_add_overflow_both_arg_nan(self):
420420

421421

422422
hcal = pd.tseries.holiday.USFederalHolidayCalendar()
423-
# These offsets currently raise a NotImplimentedError with .apply_index()
423+
# These offsets currently raise a NotImplementedError with .apply_index()
424424
non_apply = [
425425
pd.offsets.Day(),
426426
pd.offsets.BYearEnd(),

asv_bench/benchmarks/gil.py

+56-26
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from functools import wraps
2+
import threading
3+
14
import numpy as np
25

36
from pandas import (
@@ -30,21 +33,57 @@
3033
from pandas._libs import algos
3134
except ImportError:
3235
from pandas import algos
33-
try:
34-
from pandas._testing import test_parallel # noqa: PDF014
3536

36-
have_real_test_parallel = True
37-
except ImportError:
38-
have_real_test_parallel = False
3937

40-
def test_parallel(num_threads=1):
41-
def wrapper(fname):
42-
return fname
38+
from .pandas_vb_common import BaseIO # isort:skip
4339

44-
return wrapper
4540

41+
def test_parallel(num_threads=2, kwargs_list=None):
42+
"""
43+
Decorator to run the same function multiple times in parallel.
4644
47-
from .pandas_vb_common import BaseIO # isort:skip
45+
Parameters
46+
----------
47+
num_threads : int, optional
48+
The number of times the function is run in parallel.
49+
kwargs_list : list of dicts, optional
50+
The list of kwargs to update original
51+
function kwargs on different threads.
52+
53+
Notes
54+
-----
55+
This decorator does not pass the return value of the decorated function.
56+
57+
Original from scikit-image:
58+
59+
https://github.com/scikit-image/scikit-image/pull/1519
60+
61+
"""
62+
assert num_threads > 0
63+
has_kwargs_list = kwargs_list is not None
64+
if has_kwargs_list:
65+
assert len(kwargs_list) == num_threads
66+
67+
def wrapper(func):
68+
@wraps(func)
69+
def inner(*args, **kwargs):
70+
if has_kwargs_list:
71+
update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
72+
else:
73+
update_kwargs = lambda i: kwargs
74+
threads = []
75+
for i in range(num_threads):
76+
updated_kwargs = update_kwargs(i)
77+
thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
78+
threads.append(thread)
79+
for thread in threads:
80+
thread.start()
81+
for thread in threads:
82+
thread.join()
83+
84+
return inner
85+
86+
return wrapper
4887

4988

5089
class ParallelGroupbyMethods:
@@ -53,8 +92,7 @@ class ParallelGroupbyMethods:
5392
param_names = ["threads", "method"]
5493

5594
def setup(self, threads, method):
56-
if not have_real_test_parallel:
57-
raise NotImplementedError
95+
5896
N = 10**6
5997
ngroups = 10**3
6098
df = DataFrame(
@@ -86,8 +124,7 @@ class ParallelGroups:
86124
param_names = ["threads"]
87125

88126
def setup(self, threads):
89-
if not have_real_test_parallel:
90-
raise NotImplementedError
127+
91128
size = 2**22
92129
ngroups = 10**3
93130
data = Series(np.random.randint(0, ngroups, size=size))
@@ -108,8 +145,7 @@ class ParallelTake1D:
108145
param_names = ["dtype"]
109146

110147
def setup(self, dtype):
111-
if not have_real_test_parallel:
112-
raise NotImplementedError
148+
113149
N = 10**6
114150
df = DataFrame({"col": np.arange(N, dtype=dtype)})
115151
indexer = np.arange(100, len(df) - 100)
@@ -131,8 +167,7 @@ class ParallelKth:
131167
repeat = 5
132168

133169
def setup(self):
134-
if not have_real_test_parallel:
135-
raise NotImplementedError
170+
136171
N = 10**7
137172
k = 5 * 10**5
138173
kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
@@ -149,8 +184,7 @@ def time_kth_smallest(self):
149184

150185
class ParallelDatetimeFields:
151186
def setup(self):
152-
if not have_real_test_parallel:
153-
raise NotImplementedError
187+
154188
N = 10**6
155189
self.dti = date_range("1900-01-01", periods=N, freq="T")
156190
self.period = self.dti.to_period("D")
@@ -204,8 +238,7 @@ class ParallelRolling:
204238
param_names = ["method"]
205239

206240
def setup(self, method):
207-
if not have_real_test_parallel:
208-
raise NotImplementedError
241+
209242
win = 100
210243
arr = np.random.rand(100000)
211244
if hasattr(DataFrame, "rolling"):
@@ -248,8 +281,7 @@ class ParallelReadCSV(BaseIO):
248281
param_names = ["dtype"]
249282

250283
def setup(self, dtype):
251-
if not have_real_test_parallel:
252-
raise NotImplementedError
284+
253285
rows = 10000
254286
cols = 50
255287
data = {
@@ -284,8 +316,6 @@ class ParallelFactorize:
284316
param_names = ["threads"]
285317

286318
def setup(self, threads):
287-
if not have_real_test_parallel:
288-
raise NotImplementedError
289319

290320
strings = tm.makeStringIndex(100000)
291321

asv_bench/benchmarks/reshape.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ def setup(self, bins):
268268
self.datetime_series = pd.Series(
269269
np.random.randint(N, size=N), dtype="datetime64[ns]"
270270
)
271-
self.interval_bins = pd.IntervalIndex.from_breaks(np.linspace(0, N, bins))
271+
self.interval_bins = pd.IntervalIndex.from_breaks(
272+
np.linspace(0, N, bins), "right"
273+
)
272274

273275
def time_cut_int(self, bins):
274276
pd.cut(self.int_series, bins)

asv_bench/benchmarks/tslibs/normalize.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ def setup(self, size, tz):
3636
raise NotImplementedError
3737

3838
def time_normalize_i8_timestamps(self, size, tz):
39-
normalize_i8_timestamps(self.i8data, tz)
39+
# 10 i.e. NPY_FR_ns
40+
normalize_i8_timestamps(self.i8data, tz, 10)
4041

4142
def time_is_date_array_normalized(self, size, tz):
4243
# TODO: cases with different levels of short-circuiting
43-
is_date_array_normalized(self.i8data, tz)
44+
# 10 i.e. NPY_FR_ns
45+
is_date_array_normalized(self.i8data, tz, 10)

asv_bench/benchmarks/tslibs/offsets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
pass
1515

1616
hcal = pandas.tseries.holiday.USFederalHolidayCalendar()
17-
# These offsets currently raise a NotImplimentedError with .apply_index()
17+
# These offsets currently raise a NotImplementedError with .apply_index()
1818
non_apply = [
1919
offsets.Day(),
2020
offsets.BYearEnd(),

ci/code_checks.sh

+2-21
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111
# $ ./ci/code_checks.sh code # checks on imported code
1212
# $ ./ci/code_checks.sh doctests # run doctests
1313
# $ ./ci/code_checks.sh docstrings # validate docstring errors
14-
# $ ./ci/code_checks.sh typing # run static type analysis
1514
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
1615

17-
[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" || "$1" == "single-docs" ]] || \
18-
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|typing]"; exit 9999; }
16+
[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" ]] || \
17+
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings]"; exit 9999; }
1918

2019
BASE_DIR="$(dirname $0)/.."
2120
RET=0
@@ -85,24 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8584

8685
fi
8786

88-
### TYPING ###
89-
if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
90-
91-
echo "mypy --version"
92-
mypy --version
93-
94-
MSG='Performing static analysis using mypy' ; echo $MSG
95-
mypy
96-
RET=$(($RET + $?)) ; echo $MSG "DONE"
97-
98-
# run pyright, if it is installed
99-
if command -v pyright &> /dev/null ; then
100-
MSG='Performing static analysis using pyright' ; echo $MSG
101-
pyright
102-
RET=$(($RET + $?)) ; echo $MSG "DONE"
103-
fi
104-
fi
105-
10687
### SINGLE-PAGE DOCS ###
10788
if [[ -z "$CHECK" || "$CHECK" == "single-docs" ]]; then
10889
python doc/make.py --warnings-are-errors --single pandas.Series.value_counts

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ dependencies:
55
- python=3.10
66

77
# test dependencies
8-
- cython=0.29.24
8+
- cython=0.29.30
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31

ci/deps/actions-38-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66
- python=3.8
77

88
# test dependencies
9-
- cython=0.29.24
9+
- cython=0.29.30
1010
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.31

ci/deps/actions-38-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77
- python=3.8.0
88

99
# test dependencies
10-
- cython=0.29.24
10+
- cython=0.29.30
1111
- pytest>=6.0
1212
- pytest-cov
1313
- pytest-xdist>=1.31

ci/deps/actions-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ dependencies:
55
- python=3.8
66

77
# test dependencies
8-
- cython=0.29.24
8+
- cython=0.29.30
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ dependencies:
55
- python=3.9
66

77
# test dependencies
8-
- cython=0.29.24
8+
- cython=0.29.30
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31

0 commit comments

Comments
 (0)