Skip to content

Commit 09c573d

Browse files
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into add-meson-build-files
2 parents 9e1ccc2 + 18865cf commit 09c573d

File tree

174 files changed

+3398
-1058
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

174 files changed

+3398
-1058
lines changed

.github/workflows/codeql.yml

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ concurrency:
88
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
99
cancel-in-progress: true
1010

11+
permissions:
12+
contents: read
13+
1114
jobs:
1215
analyze:
1316
runs-on: ubuntu-22.04

.github/workflows/wheels.yml

+3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ concurrency:
3030
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
3131
cancel-in-progress: true
3232

33+
permissions:
34+
contents: read
35+
3336
jobs:
3437
build_wheels:
3538
name: Build wheel for ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

.pre-commit-config.yaml

+128-9
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ repos:
3434
types_or: [python, rst, markdown]
3535
additional_dependencies: [tomli]
3636
- repo: https://github.com/MarcoGorelli/cython-lint
37-
rev: v0.9.1
37+
rev: v0.10.1
3838
hooks:
3939
- id: cython-lint
4040
- id: double-quote-cython-strings
@@ -70,19 +70,16 @@ repos:
7070
rev: 6.0.0
7171
hooks:
7272
- id: flake8
73-
# Need to patch os.remove rule in pandas-dev-flaker
74-
exclude: ^ci/fix_wheels.py
7573
additional_dependencies: &flake8_dependencies
7674
- flake8==6.0.0
7775
- flake8-bugbear==22.7.1
78-
- pandas-dev-flaker==0.5.0
7976
- repo: https://github.com/pycqa/pylint
80-
rev: v2.15.6
77+
rev: v2.15.9
8178
hooks:
8279
- id: pylint
8380
stages: [manual]
8481
- repo: https://github.com/pycqa/pylint
85-
rev: v2.15.6
82+
rev: v2.15.9
8683
hooks:
8784
- id: pylint
8885
alias: redefined-outer-name
@@ -95,15 +92,14 @@ repos:
9592
|^pandas/util/_test_decorators\.py # keep excluded
9693
|^pandas/_version\.py # keep excluded
9794
|^pandas/conftest\.py # keep excluded
98-
|^pandas/core/generic\.py
9995
args: [--disable=all, --enable=redefined-outer-name]
10096
stages: [manual]
10197
- repo: https://github.com/PyCQA/isort
102-
rev: 5.10.1
98+
rev: 5.11.4
10399
hooks:
104100
- id: isort
105101
- repo: https://github.com/asottile/pyupgrade
106-
rev: v3.2.2
102+
rev: v3.3.1
107103
hooks:
108104
- id: pyupgrade
109105
args: [--py38-plus]
@@ -184,6 +180,21 @@ repos:
184180
types: [rst]
185181
args: [--filename=*.rst]
186182
additional_dependencies: [flake8-rst==0.7.0, flake8==3.7.9]
183+
- id: inconsistent-namespace-usage
184+
name: 'Check for inconsistent use of pandas namespace'
185+
entry: python scripts/check_for_inconsistent_pandas_namespace.py
186+
exclude: ^pandas/core/interchange/
187+
language: python
188+
types: [python]
189+
- id: no-os-remove
190+
name: Check code for instances of os.remove
191+
entry: os\.remove
192+
language: pygrep
193+
types: [python]
194+
files: ^pandas/tests/
195+
exclude: |
196+
(?x)^
197+
pandas/tests/io/pytables/test_store\.py$
187198
- id: unwanted-patterns
188199
name: Unwanted patterns
189200
language: pygrep
@@ -193,6 +204,20 @@ repos:
193204
\#\ type:\ (?!ignore)
194205
|\#\ type:\s?ignore(?!\[)
195206
207+
# foo._class__ instead of type(foo)
208+
|\.__class__
209+
210+
# np.bool/np.object instead of np.bool_/np.object_
211+
|np\.bool[^_8`]
212+
|np\.object[^_8`]
213+
214+
# imports from collections.abc instead of `from collections import abc`
215+
|from\ collections\.abc\ import
216+
217+
# Numpy
218+
|from\ numpy\ import\ random
219+
|from\ numpy\.random\ import
220+
196221
# Incorrect code-block / IPython directives
197222
|\.\.\ code-block\ ::
198223
|\.\.\ ipython\ ::
@@ -201,7 +226,17 @@ repos:
201226
202227
# Check for deprecated messages without sphinx directive
203228
|(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
229+
230+
# {foo!r} instead of {repr(foo)}
231+
|!r}
232+
233+
# builtin filter function
234+
|(?<!def)[\(\s]filter\(
235+
236+
# exec
237+
|[^a-zA-Z0-9_]exec\(
204238
types_or: [python, cython, rst]
239+
exclude: ^doc/source/development/code_style\.rst # contains examples of patterns to avoid
205240
- id: cython-casting
206241
name: Check Cython casting is `<type>obj`, not `<type> obj`
207242
language: pygrep
@@ -232,6 +267,58 @@ repos:
232267
files: ^pandas/tests/extension/base
233268
types: [python]
234269
exclude: ^pandas/tests/extension/base/base\.py
270+
- id: unwanted-patterns-in-tests
271+
name: Unwanted patterns in tests
272+
language: pygrep
273+
entry: |
274+
(?x)
275+
# pytest.xfail instead of pytest.mark.xfail
276+
pytest\.xfail
277+
278+
# imports from pandas._testing instead of `import pandas._testing as tm`
279+
|from\ pandas\._testing\ import
280+
|from\ pandas\ import\ _testing\ as\ tm
281+
282+
# No direct imports from conftest
283+
|conftest\ import
284+
|import\ conftest
285+
286+
# pandas.testing instead of tm
287+
|pd\.testing\.
288+
289+
# pd.api.types instead of from pandas.api.types import ...
290+
|(pd|pandas)\.api\.types\.
291+
292+
# np.testing, np.array_equal
293+
|(numpy|np)(\.testing|\.array_equal)
294+
295+
# unittest.mock (use pytest builtin monkeypatch fixture instead)
296+
|(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
297+
298+
# pytest raises without context
299+
|\s\ pytest.raises
300+
301+
# pytest.warns (use tm.assert_produces_warning instead)
302+
|pytest\.warns
303+
files: ^pandas/tests/
304+
types_or: [python, cython, rst]
305+
- id: unwanted-patterns-in-ea-tests
306+
name: Unwanted patterns in EA tests
307+
language: pygrep
308+
entry: |
309+
(?x)
310+
tm.assert_(series|frame)_equal
311+
files: ^pandas/tests/extension/base/
312+
exclude: ^pandas/tests/extension/base/base\.py$
313+
types_or: [python, cython, rst]
314+
- id: unwanted-patterns-in-cython
315+
name: Unwanted patterns in Cython code
316+
language: pygrep
317+
entry: |
318+
(?x)
319+
# `<type>obj` as opposed to `<type> obj`
320+
[a-zA-Z0-9*]>[ ]
321+
types: [cython]
235322
- id: pip-to-conda
236323
name: Generate pip dependency from conda
237324
language: python
@@ -252,6 +339,38 @@ repos:
252339
language: python
253340
types: [rst]
254341
files: ^doc/source/(development|reference)/
342+
- id: unwanted-patterns-bare-pytest-raises
343+
name: Check for use of bare pytest raises
344+
language: python
345+
entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
346+
types: [python]
347+
files: ^pandas/tests/
348+
exclude: ^pandas/tests/extension/
349+
- id: unwanted-patterns-private-function-across-module
350+
name: Check for use of private functions across modules
351+
language: python
352+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
353+
types: [python]
354+
exclude: ^(asv_bench|pandas/tests|doc)/
355+
- id: unwanted-patterns-private-import-across-module
356+
name: Check for import of private attributes across modules
357+
language: python
358+
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
359+
types: [python]
360+
exclude: |
361+
(?x)
362+
^(asv_bench|pandas/tests|doc)/
363+
|scripts/validate_min_versions_in_sync\.py$
364+
- id: unwanted-patterns-strings-to-concatenate
365+
name: Check for use of not concatenated strings
366+
language: python
367+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_to_concatenate"
368+
types_or: [python, cython]
369+
- id: unwanted-patterns-strings-with-misplaced-whitespace
370+
name: Check for strings with misplaced spaces
371+
language: python
372+
entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
373+
types_or: [python, cython]
255374
- id: use-pd_array-in-core
256375
name: Import pandas.array as pd_array in core
257376
language: python

asv_bench/benchmarks/array.py

+9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class IntervalArray:
48+
def setup(self):
49+
N = 10_000
50+
self.tuples = [(i, i + 1) for i in range(N)]
51+
52+
def time_from_tuples(self):
53+
pd.arrays.IntervalArray.from_tuples(self.tuples)
54+
55+
4756
class StringArray:
4857
def setup(self):
4958
N = 100_000

asv_bench/benchmarks/pandas_vb_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class BaseIO:
7070
def remove(self, f):
7171
"""Remove created files"""
7272
try:
73-
os.remove(f) # noqa: PDF008
73+
os.remove(f)
7474
except OSError:
7575
# On Windows, attempting to remove a file that is in use
7676
# causes an exception to be raised

asv_bench/benchmarks/rolling.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class Groupby:
292292
["sum", "median", "mean", "max", "min", "kurt", "sum"],
293293
[
294294
("rolling", {"window": 2}),
295-
("rolling", {"window": "30s", "on": "C"}),
295+
("rolling", {"window": "30s"}),
296296
("expanding", {}),
297297
],
298298
)
@@ -304,9 +304,10 @@ def setup(self, method, window_kwargs):
304304
{
305305
"A": [str(i) for i in range(N)] * 10,
306306
"B": list(range(N)) * 10,
307-
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
308307
}
309308
)
309+
if isinstance(kwargs.get("window", None), str):
310+
df.index = pd.date_range(start="1900-01-01", freq="1min", periods=N * 10)
310311
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)
311312

312313
def time_method(self, method, window_kwargs):

asv_bench/benchmarks/series_methods.py

+19
Original file line numberDiff line numberDiff line change
@@ -382,4 +382,23 @@ def time_iter(self, dtype):
382382
pass
383383

384384

385+
class ToNumpy:
386+
def setup(self):
387+
N = 1_000_000
388+
self.ser = Series(
389+
np.random.randn(
390+
N,
391+
)
392+
)
393+
394+
def time_to_numpy(self):
395+
self.ser.to_numpy()
396+
397+
def time_to_numpy_double_copy(self):
398+
self.ser.to_numpy(dtype="float64", copy=True)
399+
400+
def time_to_numpy_copy(self):
401+
self.ser.to_numpy(copy=True)
402+
403+
385404
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ dependencies:
4848
- pyxlsb
4949
- s3fs>=2021.08.0
5050
- scipy
51-
- sqlalchemy
51+
- sqlalchemy<1.4.46
5252
- tabulate
5353
- tzdata>=2022a
5454
- xarray

ci/deps/actions-38-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ dependencies:
4848
- pyxlsb
4949
- s3fs>=2021.08.0
5050
- scipy
51-
- sqlalchemy
51+
- sqlalchemy<1.4.46
5252
- tabulate
5353
- xarray
5454
- xlrd

ci/deps/actions-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ dependencies:
4848
- pyxlsb
4949
- s3fs>=2021.08.0
5050
- scipy
51-
- sqlalchemy
51+
- sqlalchemy<1.4.46
5252
- tabulate
5353
- xarray
5454
- xlrd

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ dependencies:
4848
- pyxlsb
4949
- s3fs>=2021.08.0
5050
- scipy
51-
- sqlalchemy
51+
- sqlalchemy<1.4.46
5252
- tabulate
5353
- tzdata>=2022a
5454
- xarray

ci/deps/circle-38-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ dependencies:
4949
- pyxlsb
5050
- s3fs>=2021.08.0
5151
- scipy
52-
- sqlalchemy
52+
- sqlalchemy<1.4.46
5353
- tabulate
5454
- xarray
5555
- xlrd

doc/scripts/eval_performance.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
from pandas import DataFrame
77

88
setup_common = """from pandas import DataFrame
9-
from numpy.random import randn
10-
df = DataFrame(randn(%d, 3), columns=list('abc'))
9+
df = DataFrame(np.random.randn(%d, 3), columns=list('abc'))
1110
%s"""
1211

1312
setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"

doc/source/development/contributing_codebase.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Pre-commit
4343
----------
4444

4545
Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
46-
like ``black``, ``flake8`` (including a `pandas-dev-flaker <https://github.com/pandas-dev/pandas-dev-flaker>`_ plugin),
46+
like ``black``, ``flake8``,
4747
``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
4848
Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
4949
it is helpful to run the check yourself before submitting code. This

doc/source/reference/arrays.rst

+31
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,37 @@ is an :class:`ArrowDtype`.
6060
`Pyarrow <https://arrow.apache.org/docs/python/index.html>`__ provides similar array and `data type <https://arrow.apache.org/docs/python/api/datatypes.html>`__
6161
support as NumPy including first-class nullability support for all data types, immutability and more.
6262

63+
The table below shows the equivalent pyarrow-backed (``pa``), pandas extension, and numpy (``np``) types that are recognized by pandas.
64+
Pyarrow-backed types below need to be passed into :class:`ArrowDtype` to be recognized by pandas e.g. ``pd.ArrowDtype(pa.bool_())``
65+
66+
=============================================== ========================== ===================
67+
PyArrow type pandas extension type NumPy type
68+
=============================================== ========================== ===================
69+
:external+pyarrow:py:func:`pyarrow.bool_` :class:`BooleanDtype` ``np.bool_``
70+
:external+pyarrow:py:func:`pyarrow.int8` :class:`Int8Dtype` ``np.int8``
71+
:external+pyarrow:py:func:`pyarrow.int16` :class:`Int16Dtype` ``np.int16``
72+
:external+pyarrow:py:func:`pyarrow.int32` :class:`Int32Dtype` ``np.int32``
73+
:external+pyarrow:py:func:`pyarrow.int64` :class:`Int64Dtype` ``np.int64``
74+
:external+pyarrow:py:func:`pyarrow.uint8` :class:`UInt8Dtype` ``np.uint8``
75+
:external+pyarrow:py:func:`pyarrow.uint16` :class:`UInt16Dtype` ``np.uint16``
76+
:external+pyarrow:py:func:`pyarrow.uint32` :class:`UInt32Dtype` ``np.uint32``
77+
:external+pyarrow:py:func:`pyarrow.uint64` :class:`UInt64Dtype` ``np.uint64``
78+
:external+pyarrow:py:func:`pyarrow.float32` :class:`Float32Dtype` ``np.float32``
79+
:external+pyarrow:py:func:`pyarrow.float64` :class:`Float64Dtype` ``np.float64``
80+
:external+pyarrow:py:func:`pyarrow.time32` (none) (none)
81+
:external+pyarrow:py:func:`pyarrow.time64` (none) (none)
82+
:external+pyarrow:py:func:`pyarrow.timestamp` :class:`DatetimeTZDtype` ``np.datetime64``
83+
:external+pyarrow:py:func:`pyarrow.date32` (none) (none)
84+
:external+pyarrow:py:func:`pyarrow.date64` (none) (none)
85+
:external+pyarrow:py:func:`pyarrow.duration` (none) ``np.timedelta64``
86+
:external+pyarrow:py:func:`pyarrow.binary` (none) (none)
87+
:external+pyarrow:py:func:`pyarrow.string` :class:`StringDtype` ``np.str_``
88+
:external+pyarrow:py:func:`pyarrow.decimal128` (none) (none)
89+
:external+pyarrow:py:func:`pyarrow.list_` (none) (none)
90+
:external+pyarrow:py:func:`pyarrow.map_` (none) (none)
91+
:external+pyarrow:py:func:`pyarrow.dictionary` :class:`CategoricalDtype` (none)
92+
=============================================== ========================== ===================
93+
6394
.. note::
6495

6596
For string types (``pyarrow.string()``, ``string[pyarrow]``), PyArrow support is still facilitated

0 commit comments

Comments
 (0)