Skip to content

Commit b0d0d6a

Browse files
authored
Merge branch 'main' into include-pyproject
2 parents 616e095 + 9e8a243 commit b0d0d6a

File tree

394 files changed

+8624
-4373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

394 files changed

+8624
-4373
lines changed

.github/ISSUE_TEMPLATE/bug_report.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ body:
1717
[latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
1818
required: true
1919
- label: >
20-
I have confirmed this bug exists on the main branch of pandas.
20+
I have confirmed this bug exists on the [main branch]
21+
(https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
22+
of pandas.
2123
- type: textarea
2224
id: example
2325
attributes:

.github/workflows/32-bit-linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
. ~/virtualenvs/pandas-dev/bin/activate && \
4040
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
4141
python -m pip install versioneer[toml] && \
42-
python -m pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
42+
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
4343
python setup.py build_ext -q -j1 && \
4444
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4545
python -m pip list && \

.github/workflows/codeql.yml

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ concurrency:
88
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
1114
jobs:
1215
analyze:
1316
runs-on: ubuntu-22.04

.github/workflows/python-dev.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ jobs:
7373
run: |
7474
python --version
7575
python -m pip install --upgrade pip setuptools wheel
76-
python -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
76+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
7777
python -m pip install git+https://github.com/nedbat/coveragepy.git
7878
python -m pip install versioneer[toml]
79-
python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
79+
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
8080
python -m pip list
8181
8282
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs

.github/workflows/wheels.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ concurrency:
3030
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
3131
cancel-in-progress: true
3232

33+
permissions:
34+
contents: read
35+
3336
jobs:
3437
build_wheels:
3538
name: Build wheel for ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
@@ -165,7 +168,7 @@ jobs:
165168
# (1. Generate sdist 2. Build wheels from sdist)
166169
# This tests the sdists, and saves some build time
167170
python -m pip install dist/*.gz
168-
pip install hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-asyncio>=0.17
171+
pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
169172
cd .. # Not a good idea to test within the src tree
170173
python -c "import pandas; print(pandas.__version__);
171174
pandas.test(extra_args=['-m not clipboard and not single_cpu', '--skip-slow', '--skip-network', '--skip-db', '-n=2', '--no-strict-data-files']);

.pre-commit-config.yaml

+134-30
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ default_stages: [
1515
ci:
1616
autofix_prs: false
1717
repos:
18+
- repo: https://github.com/charliermarsh/ruff-pre-commit
19+
rev: v0.0.215
20+
hooks:
21+
- id: ruff
1822
- repo: https://github.com/MarcoGorelli/absolufy-imports
1923
rev: v0.3.1
2024
hooks:
@@ -34,7 +38,7 @@ repos:
3438
types_or: [python, rst, markdown]
3539
additional_dependencies: [tomli]
3640
- repo: https://github.com/MarcoGorelli/cython-lint
37-
rev: v0.9.1
41+
rev: v0.10.1
3842
hooks:
3943
- id: cython-lint
4044
- id: double-quote-cython-strings
@@ -63,25 +67,16 @@ repos:
6367
'--extensions=c,h',
6468
'--headers=h',
6569
--recursive,
66-
'--filter=-readability/casting,-runtime/int,-build/include_subdir'
70+
--linelength=88,
71+
'--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
6772
]
68-
- repo: https://github.com/PyCQA/flake8
69-
rev: 6.0.0
70-
hooks:
71-
- id: flake8
72-
# Need to patch os.remove rule in pandas-dev-flaker
73-
exclude: ^ci/fix_wheels.py
74-
additional_dependencies: &flake8_dependencies
75-
- flake8==6.0.0
76-
- flake8-bugbear==22.7.1
77-
- pandas-dev-flaker==0.5.0
7873
- repo: https://github.com/pycqa/pylint
79-
rev: v2.15.6
74+
rev: v2.15.9
8075
hooks:
8176
- id: pylint
8277
stages: [manual]
8378
- repo: https://github.com/pycqa/pylint
84-
rev: v2.15.6
79+
rev: v2.15.9
8580
hooks:
8681
- id: pylint
8782
alias: redefined-outer-name
@@ -94,15 +89,14 @@ repos:
9489
|^pandas/util/_test_decorators\.py # keep excluded
9590
|^pandas/_version\.py # keep excluded
9691
|^pandas/conftest\.py # keep excluded
97-
|^pandas/core/generic\.py
9892
args: [--disable=all, --enable=redefined-outer-name]
9993
stages: [manual]
10094
- repo: https://github.com/PyCQA/isort
101-
rev: 5.10.1
95+
rev: 5.11.4
10296
hooks:
10397
- id: isort
10498
- repo: https://github.com/asottile/pyupgrade
105-
rev: v3.2.2
99+
rev: v3.3.1
106100
hooks:
107101
- id: pyupgrade
108102
args: [--py38-plus]
@@ -120,12 +114,6 @@ repos:
120114
rev: v0.6.7
121115
hooks:
122116
- id: sphinx-lint
123-
- repo: https://github.com/asottile/yesqa
124-
rev: v1.4.0
125-
hooks:
126-
- id: yesqa
127-
additional_dependencies: *flake8_dependencies
128-
stages: [manual]
129117
- repo: local
130118
hooks:
131119
# NOTE: we make `black` a local hook because if it's installed from
@@ -183,6 +171,21 @@ repos:
183171
types: [rst]
184172
args: [--filename=*.rst]
185173
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
174+
- id: inconsistent-namespace-usage
175+
name: 'Check for inconsistent use of pandas namespace'
176+
entry: python scripts/check_for_inconsistent_pandas_namespace.py
177+
exclude: ^pandas/core/interchange/
178+
language: python
179+
types: [python]
180+
- id: no-os-remove
181+
name: Check code for instances of os.remove
182+
entry: os\.remove
183+
language: pygrep
184+
types: [python]
185+
files: ^pandas/tests/
186+
exclude: |
187+
(?x)^
188+
pandas/tests/io/pytables/test_store\.py$
186189
- id: unwanted-patterns
187190
name: Unwanted patterns
188191
language: pygrep
@@ -192,6 +195,20 @@ repos:
192195
\#\ type:\ (?!ignore)
193196
|\#\ type:\s?ignore(?!\[)
194197
198+
# foo._class__ instead of type(foo)
199+
|\.__class__
200+
201+
# np.bool/np.object instead of np.bool_/np.object_
202+
|np\.bool[^_8`]
203+
|np\.object[^_8`]
204+
205+
# imports from collections.abc instead of `from collections import abc`
206+
|from\ collections\.abc\ import
207+
208+
# Numpy
209+
|from\ numpy\ import\ random
210+
|from\ numpy\.random\ import
211+
195212
# Incorrect code-block / IPython directives
196213
|\.\.\ code-block\ ::
197214
|\.\.\ ipython\ ::
@@ -200,7 +217,17 @@ repos:
200217
201218
# Check for deprecated messages without sphinx directive
202219
|(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
220+
221+
# {foo!r} instead of {repr(foo)}
222+
|!r}
223+
224+
# builtin filter function
225+
|(?<!def)[\(\s]filter\(
226+
227+
# exec
228+
|[^a-zA-Z0-9_]exec\(
203229
types_or: [python, cython, rst]
230+
exclude: ^doc/source/development/code_style\.rst # contains examples of patterns to avoid
204231
- id: cython-casting
205232
name: Check Cython casting is `<type>obj`, not `<type> obj`
206233
language: pygrep
@@ -231,26 +258,103 @@ repos:
231258
files: ^pandas/tests/extension/base
232259
types: [python]
233260
exclude: ^pandas/tests/extension/base/base\.py
261+
- id: unwanted-patterns-in-tests
262+
name: Unwanted patterns in tests
263+
language: pygrep
264+
entry: |
265+
(?x)
266+
# pytest.xfail instead of pytest.mark.xfail
267+
pytest\.xfail
268+
269+
# imports from pandas._testing instead of `import pandas._testing as tm`
270+
|from\ pandas\._testing\ import
271+
|from\ pandas\ import\ _testing\ as\ tm
272+
273+
# No direct imports from conftest
274+
|conftest\ import
275+
|import\ conftest
276+
277+
# pandas.testing instead of tm
278+
|pd\.testing\.
279+
280+
# pd.api.types instead of from pandas.api.types import ...
281+
|(pd|pandas)\.api\.types\.
282+
283+
# np.testing, np.array_equal
284+
|(numpy|np)(\.testing|\.array_equal)
285+
286+
# unittest.mock (use pytest builtin monkeypatch fixture instead)
287+
|(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
288+
289+
# pytest raises without context
290+
|\s\ pytest.raises
291+
292+
# pytest.warns (use tm.assert_produces_warning instead)
293+
|pytest\.warns
294+
files: ^pandas/tests/
295+
types_or: [python, cython, rst]
296+
- id: unwanted-patterns-in-ea-tests
297+
name: Unwanted patterns in EA tests
298+
language: pygrep
299+
entry: |
300+
(?x)
301+
tm.assert_(series|frame)_equal
302+
files: ^pandas/tests/extension/base/
303+
exclude: ^pandas/tests/extension/base/base\.py$
304+
types_or: [python, cython, rst]
305+
- id: unwanted-patterns-in-cython
306+
name: Unwanted patterns in Cython code
307+
language: pygrep
308+
entry: |
309+
(?x)
310+
# `<type>obj` as opposed to `<type> obj`
311+
[a-zA-Z0-9*]>[ ]
312+
types: [cython]
234313
- id: pip-to-conda
235314
name: Generate pip dependency from conda
236315
language: python
237316
entry: python scripts/generate_pip_deps_from_conda.py
238317
files: ^(environment.yml|requirements-dev.txt)$
239318
pass_filenames: false
240319
additional_dependencies: [pyyaml, toml]
241-
- id: sync-flake8-versions
242-
name: Check flake8 version is synced across flake8, yesqa, and environment.yml
243-
language: python
244-
entry: python scripts/sync_flake8_versions.py
245-
files: ^(\.pre-commit-config\.yaml|environment\.yml)$
246-
pass_filenames: false
247-
additional_dependencies: [pyyaml, toml]
248320
- id: title-capitalization
249321
name: Validate correct capitalization among titles in documentation
250322
entry: python scripts/validate_rst_title_capitalization.py
251323
language: python
252324
types: [rst]
253325
files: ^doc/source/(development|reference)/
326+
- id: unwanted-patterns-bare-pytest-raises
327+
name: Check for use of bare pytest raises
328+
language: python
329+
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
330+
types: [python]
331+
files: ^pandas/tests/
332+
exclude: ^pandas/tests/extension/
333+
- id: unwanted-patterns-private-function-across-module
334+
name: Check for use of private functions across modules
335+
language: python
336+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
337+
types: [python]
338+
exclude: ^(asv_bench|pandas/tests|doc)/
339+
- id: unwanted-patterns-private-import-across-module
340+
name: Check for import of private attributes across modules
341+
language: python
342+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
343+
types: [python]
344+
exclude: |
345+
(?x)
346+
^(asv_bench|pandas/tests|doc)/
347+
|scripts/validate_min_versions_in_sync\.py$
348+
- id: unwanted-patterns-strings-to-concatenate
349+
name: Check for use of not concatenated strings
350+
language: python
351+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
352+
types_or: [python, cython]
353+
- id: unwanted-patterns-strings-with-misplaced-whitespace
354+
name: Check for strings with misplaced spaces
355+
language: python
356+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
357+
types_or: [python, cython]
254358
- id: use-pd_array-in-core
255359
name: Import pandas.array as pd_array in core
256360
language: python

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"numpy": ["1.23.5"], // https://github.com/pandas-dev/pandas/pull/50356
44+
"numpy": [],
4545
"Cython": ["0.29.32"],
4646
"matplotlib": [],
4747
"sqlalchemy": [],

asv_bench/benchmarks/array.py

+9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class IntervalArray:
48+
def setup(self):
49+
N = 10_000
50+
self.tuples = [(i, i + 1) for i in range(N)]
51+
52+
def time_from_tuples(self):
53+
pd.arrays.IntervalArray.from_tuples(self.tuples)
54+
55+
4756
class StringArray:
4857
def setup(self):
4958
N = 100_000

asv_bench/benchmarks/indexing.py

+13
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,19 @@ def time_assign_list_of_columns_concat(self):
476476
concat([self.df, df], axis=1)
477477

478478

479+
class Setitem:
480+
def setup(self):
481+
N = 500_000
482+
cols = 500
483+
self.df = DataFrame(np.random.rand(N, cols))
484+
485+
def time_setitem(self):
486+
self.df[100] = 100
487+
488+
def time_setitem_list(self):
489+
self.df[[100, 200, 300]] = 100
490+
491+
479492
class ChainIndexing:
480493

481494
params = [None, "warn"]

asv_bench/benchmarks/pandas_vb_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class BaseIO:
7070
def remove(self, f):
7171
"""Remove created files"""
7272
try:
73-
os.remove(f) # noqa: PDF008
73+
os.remove(f)
7474
except OSError:
7575
# On Windows, attempting to remove a file that is in use
7676
# causes an exception to be raised

asv_bench/benchmarks/rolling.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class Groupby:
292292
["sum", "median", "mean", "max", "min", "kurt", "sum"],
293293
[
294294
("rolling", {"window": 2}),
295-
("rolling", {"window": "30s", "on": "C"}),
295+
("rolling", {"window": "30s"}),
296296
("expanding", {}),
297297
],
298298
)
@@ -304,9 +304,10 @@ def setup(self, method, window_kwargs):
304304
{
305305
"A": [str(i) for i in range(N)] * 10,
306306
"B": list(range(N)) * 10,
307-
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
308307
}
309308
)
309+
if isinstance(kwargs.get("window", None), str):
310+
df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10)
310311
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
311312

312313
def time_method(self, method, window_kwargs):

0 commit comments

Comments
 (0)