Skip to content

Commit 491616b

Browse files
committed
2 parents c989cdb + cb83712 commit 491616b

File tree

362 files changed

+7377
-3929
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

362 files changed

+7377
-3929
lines changed

.github/ISSUE_TEMPLATE/bug_report.yaml

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ body:
1717
[latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
1818
required: true
1919
- label: >
20-
I have confirmed this bug exists on the main branch of pandas.
20+
I have confirmed this bug exists on the [main branch]
21+
(https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
22+
of pandas.
2123
- type: textarea
2224
id: example
2325
attributes:

.github/workflows/32-bit-linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
. ~/virtualenvs/pandas-dev/bin/activate && \
4040
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
4141
python -m pip install versioneer[toml] && \
42-
python -m pip install cython numpy python-dateutil pytz pytest pytest-xdist pytest-asyncio>=0.17 hypothesis && \
42+
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
4343
python setup.py build_ext -q -j1 && \
4444
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4545
python -m pip list && \

.github/workflows/python-dev.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ jobs:
7373
run: |
7474
python --version
7575
python -m pip install --upgrade pip setuptools wheel
76-
python -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
76+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
7777
python -m pip install git+https://github.com/nedbat/coveragepy.git
7878
python -m pip install versioneer[toml]
79-
python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
79+
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
8080
python -m pip list
8181
8282
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs

.github/workflows/wheels.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ jobs:
168168
# (1. Generate sdist 2. Build wheels from sdist)
169169
# This tests the sdists, and saves some build time
170170
python -m pip install dist/*.gz
171-
pip install hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-asyncio>=0.17
171+
pip install hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17
172172
cd .. # Not a good idea to test within the src tree
173173
python -c "import pandas; print(pandas.__version__);
174174
pandas.test(extra_args=['-m not clipboard and not single_cpu', '--skip-slow', '--skip-network', '--skip-db', '-n=2']);

.pre-commit-config.yaml

+127-23
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ default_stages: [
1515
ci:
1616
autofix_prs: false
1717
repos:
18+
- repo: https://github.com/charliermarsh/ruff-pre-commit
19+
rev: v0.0.215
20+
hooks:
21+
- id: ruff
1822
- repo: https://github.com/MarcoGorelli/absolufy-imports
1923
rev: v0.3.1
2024
hooks:
@@ -66,16 +70,6 @@ repos:
6670
--linelength=88,
6771
'--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
6872
]
69-
- repo: https://github.com/PyCQA/flake8
70-
rev: 6.0.0
71-
hooks:
72-
- id: flake8
73-
# Need to patch os.remove rule in pandas-dev-flaker
74-
exclude: ^ci/fix_wheels.py
75-
additional_dependencies: &flake8_dependencies
76-
- flake8==6.0.0
77-
- flake8-bugbear==22.7.1
78-
- pandas-dev-flaker==0.5.0
7973
- repo: https://github.com/pycqa/pylint
8074
rev: v2.15.9
8175
hooks:
@@ -120,12 +114,6 @@ repos:
120114
rev: v0.6.7
121115
hooks:
122116
- id: sphinx-lint
123-
- repo: https://github.com/asottile/yesqa
124-
rev: v1.4.0
125-
hooks:
126-
- id: yesqa
127-
additional_dependencies: *flake8_dependencies
128-
stages: [manual]
129117
- repo: local
130118
hooks:
131119
# NOTE: we make `black` a local hook because if it's installed from
@@ -183,6 +171,21 @@ repos:
183171
types: [rst]
184172
args: [--filename=*.rst]
185173
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
174+
- id: inconsistent-namespace-usage
175+
name: 'Check for inconsistent use of pandas namespace'
176+
entry: python scripts/check_for_inconsistent_pandas_namespace.py
177+
exclude: ^pandas/core/interchange/
178+
language: python
179+
types: [python]
180+
- id: no-os-remove
181+
name: Check code for instances of os.remove
182+
entry: os\.remove
183+
language: pygrep
184+
types: [python]
185+
files: ^pandas/tests/
186+
exclude: |
187+
(?x)^
188+
pandas/tests/io/pytables/test_store\.py$
186189
- id: unwanted-patterns
187190
name: Unwanted patterns
188191
language: pygrep
@@ -192,6 +195,20 @@ repos:
192195
\#\ type:\ (?!ignore)
193196
|\#\ type:\s?ignore(?!\[)
194197
198+
# foo._class__ instead of type(foo)
199+
|\.__class__
200+
201+
# np.bool/np.object instead of np.bool_/np.object_
202+
|np\.bool[^_8`]
203+
|np\.object[^_8`]
204+
205+
# imports from collections.abc instead of `from collections import abc`
206+
|from\ collections\.abc\ import
207+
208+
# Numpy
209+
|from\ numpy\ import\ random
210+
|from\ numpy\.random\ import
211+
195212
# Incorrect code-block / IPython directives
196213
|\.\.\ code-block\ ::
197214
|\.\.\ ipython\ ::
@@ -200,7 +217,17 @@ repos:
200217
201218
# Check for deprecated messages without sphinx directive
202219
|(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
220+
221+
# {foo!r} instead of {repr(foo)}
222+
|!r}
223+
224+
# builtin filter function
225+
|(?<!def)[\(\s]filter\(
226+
227+
# exec
228+
|[^a-zA-Z0-9_]exec\(
203229
types_or: [python, cython, rst]
230+
exclude: ^doc/source/development/code_style\.rst # contains examples of patterns to avoid
204231
- id: cython-casting
205232
name: Check Cython casting is `<type>obj`, not `<type> obj`
206233
language: pygrep
@@ -231,26 +258,103 @@ repos:
231258
files: ^pandas/tests/extension/base
232259
types: [python]
233260
exclude: ^pandas/tests/extension/base/base\.py
261+
- id: unwanted-patterns-in-tests
262+
name: Unwanted patterns in tests
263+
language: pygrep
264+
entry: |
265+
(?x)
266+
# pytest.xfail instead of pytest.mark.xfail
267+
pytest\.xfail
268+
269+
# imports from pandas._testing instead of `import pandas._testing as tm`
270+
|from\ pandas\._testing\ import
271+
|from\ pandas\ import\ _testing\ as\ tm
272+
273+
# No direct imports from conftest
274+
|conftest\ import
275+
|import\ conftest
276+
277+
# pandas.testing instead of tm
278+
|pd\.testing\.
279+
280+
# pd.api.types instead of from pandas.api.types import ...
281+
|(pd|pandas)\.api\.types\.
282+
283+
# np.testing, np.array_equal
284+
|(numpy|np)(\.testing|\.array_equal)
285+
286+
# unittest.mock (use pytest builtin monkeypatch fixture instead)
287+
|(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
288+
289+
# pytest raises without context
290+
|\s\ pytest.raises
291+
292+
# pytest.warns (use tm.assert_produces_warning instead)
293+
|pytest\.warns
294+
files: ^pandas/tests/
295+
types_or: [python, cython, rst]
296+
- id: unwanted-patterns-in-ea-tests
297+
name: Unwanted patterns in EA tests
298+
language: pygrep
299+
entry: |
300+
(?x)
301+
tm.assert_(series|frame)_equal
302+
files: ^pandas/tests/extension/base/
303+
exclude: ^pandas/tests/extension/base/base\.py$
304+
types_or: [python, cython, rst]
305+
- id: unwanted-patterns-in-cython
306+
name: Unwanted patterns in Cython code
307+
language: pygrep
308+
entry: |
309+
(?x)
310+
# `<type>obj` as opposed to `<type> obj`
311+
[a-zA-Z0-9*]>[ ]
312+
types: [cython]
234313
- id: pip-to-conda
235314
name: Generate pip dependency from conda
236315
language: python
237316
entry: python scripts/generate_pip_deps_from_conda.py
238317
files: ^(environment.yml|requirements-dev.txt)$
239318
pass_filenames: false
240319
additional_dependencies: [pyyaml, toml]
241-
- id: sync-flake8-versions
242-
name: Check flake8 version is synced across flake8, yesqa, and environment.yml
243-
language: python
244-
entry: python scripts/sync_flake8_versions.py
245-
files: ^(\.pre-commit-config\.yaml|environment\.yml)$
246-
pass_filenames: false
247-
additional_dependencies: [pyyaml, toml]
248320
- id: title-capitalization
249321
name: Validate correct capitalization among titles in documentation
250322
entry: python scripts/validate_rst_title_capitalization.py
251323
language: python
252324
types: [rst]
253325
files: ^doc/source/(development|reference)/
326+
- id: unwanted-patterns-bare-pytest-raises
327+
name: Check for use of bare pytest raises
328+
language: python
329+
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
330+
types: [python]
331+
files: ^pandas/tests/
332+
exclude: ^pandas/tests/extension/
333+
- id: unwanted-patterns-private-function-across-module
334+
name: Check for use of private functions across modules
335+
language: python
336+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
337+
types: [python]
338+
exclude: ^(asv_bench|pandas/tests|doc)/
339+
- id: unwanted-patterns-private-import-across-module
340+
name: Check for import of private attributes across modules
341+
language: python
342+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
343+
types: [python]
344+
exclude: |
345+
(?x)
346+
^(asv_bench|pandas/tests|doc)/
347+
|scripts/validate_min_versions_in_sync\.py$
348+
- id: unwanted-patterns-strings-to-concatenate
349+
name: Check for use of not concatenated strings
350+
language: python
351+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
352+
types_or: [python, cython]
353+
- id: unwanted-patterns-strings-with-misplaced-whitespace
354+
name: Check for strings with misplaced spaces
355+
language: python
356+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
357+
types_or: [python, cython]
254358
- id: use-pd_array-in-core
255359
name: Import pandas.array as pd_array in core
256360
language: python

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"numpy": ["1.23.5"], // https://github.com/pandas-dev/pandas/pull/50356
44+
"numpy": [],
4545
"Cython": ["0.29.32"],
4646
"matplotlib": [],
4747
"sqlalchemy": [],

asv_bench/benchmarks/array.py

+9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class IntervalArray:
48+
def setup(self):
49+
N = 10_000
50+
self.tuples = [(i, i + 1) for i in range(N)]
51+
52+
def time_from_tuples(self):
53+
pd.arrays.IntervalArray.from_tuples(self.tuples)
54+
55+
4756
class StringArray:
4857
def setup(self):
4958
N = 100_000

asv_bench/benchmarks/indexing.py

+13
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,19 @@ def time_assign_list_of_columns_concat(self):
476476
concat([self.df, df], axis=1)
477477

478478

479+
class Setitem:
480+
def setup(self):
481+
N = 500_000
482+
cols = 500
483+
self.df = DataFrame(np.random.rand(N, cols))
484+
485+
def time_setitem(self):
486+
self.df[100] = 100
487+
488+
def time_setitem_list(self):
489+
self.df[[100, 200, 300]] = 100
490+
491+
479492
class ChainIndexing:
480493

481494
params = [None, "warn"]

asv_bench/benchmarks/pandas_vb_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class BaseIO:
7070
def remove(self, f):
7171
"""Remove created files"""
7272
try:
73-
os.remove(f) # noqa: PDF008
73+
os.remove(f)
7474
except OSError:
7575
# On Windows, attempting to remove a file that is in use
7676
# causes an exception to be raised

asv_bench/benchmarks/rolling.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class Groupby:
292292
["sum", "median", "mean", "max", "min", "kurt", "sum"],
293293
[
294294
("rolling", {"window": 2}),
295-
("rolling", {"window": "30s", "on": "C"}),
295+
("rolling", {"window": "30s"}),
296296
("expanding", {}),
297297
],
298298
)
@@ -304,9 +304,10 @@ def setup(self, method, window_kwargs):
304304
{
305305
"A": [str(i) for i in range(N)] * 10,
306306
"B": list(range(N)) * 10,
307-
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
308307
}
309308
)
309+
if isinstance(kwargs.get("window", None), str):
310+
df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10)
310311
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
311312

312313
def time_method(self, method, window_kwargs):

asv_bench/benchmarks/series_methods.py

+19
Original file line numberDiff line numberDiff line change
@@ -382,4 +382,23 @@ def time_iter(self, dtype):
382382
pass
383383

384384

385+
class ToNumpy:
386+
def setup(self):
387+
N = 1_000_000
388+
self.ser = Series(
389+
np.random.randn(
390+
N,
391+
)
392+
)
393+
394+
def time_to_numpy(self):
395+
self.ser.to_numpy()
396+
397+
def time_to_numpy_double_copy(self):
398+
self.ser.to_numpy(dtype="float64", copy=True)
399+
400+
def time_to_numpy_copy(self):
401+
self.ser.to_numpy(copy=True)
402+
403+
385404
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

+25
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,31 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8484
RET=$(($RET + $?)) ; echo $MSG "DONE"
8585

86+
MSG='Partially validate docstrings (RT02)' ; echo $MSG
87+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
88+
pandas.Series.align \
89+
pandas.Series.dt.total_seconds \
90+
pandas.Series.cat.rename_categories \
91+
pandas.Series.cat.reorder_categories \
92+
pandas.Series.cat.add_categories \
93+
pandas.Series.cat.remove_categories \
94+
pandas.Series.cat.remove_unused_categories \
95+
pandas.Index.all \
96+
pandas.Index.any \
97+
pandas.MultiIndex.drop \
98+
pandas.DatetimeIndex.to_pydatetime \
99+
pandas.TimedeltaIndex.to_pytimedelta \
100+
pandas.core.groupby.SeriesGroupBy.apply \
101+
pandas.core.groupby.DataFrameGroupBy.apply \
102+
pandas.io.formats.style.Styler.export \
103+
pandas.api.extensions.ExtensionArray.astype \
104+
pandas.api.extensions.ExtensionArray.dropna \
105+
pandas.api.extensions.ExtensionArray.isna \
106+
pandas.api.extensions.ExtensionArray.repeat \
107+
pandas.api.extensions.ExtensionArray.unique \
108+
pandas.DataFrame.align
109+
RET=$(($RET + $?)) ; echo $MSG "DONE"
110+
86111
fi
87112

88113
### DOCUMENTATION NOTEBOOKS ###

0 commit comments

Comments
 (0)