Skip to content

Commit 6eab1f2

Browse files
committed
Merge branch 'main' into pandas-devgh-10446
1 parent 052ec72 commit 6eab1f2

File tree

153 files changed

+1404
-611
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+1404
-611
lines changed

.github/workflows/unit-tests.yml

+13-19
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ jobs:
107107

108108
services:
109109
mysql:
110-
image: mysql:8
110+
image: mysql:9
111111
env:
112112
MYSQL_ALLOW_EMPTY_PASSWORD: yes
113113
MYSQL_DATABASE: pandas
@@ -120,7 +120,7 @@ jobs:
120120
- 3306:3306
121121

122122
postgres:
123-
image: postgres:16
123+
image: postgres:17
124124
env:
125125
PGUSER: postgres
126126
POSTGRES_USER: postgres
@@ -135,7 +135,7 @@ jobs:
135135
- 5432:5432
136136

137137
moto:
138-
image: motoserver/moto:5.0.0
138+
image: motoserver/moto:5.0.27
139139
env:
140140
AWS_ACCESS_KEY_ID: foobar_key
141141
AWS_SECRET_ACCESS_KEY: foobar_secret
@@ -242,15 +242,14 @@ jobs:
242242
- name: Build environment and Run Tests
243243
# https://github.com/numpy/numpy/issues/24703#issuecomment-1722379388
244244
run: |
245-
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
245+
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
246246
. ~/virtualenvs/pandas-dev/bin/activate
247247
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
248248
python -m pip install numpy -Csetup-args="-Dallow-noblas=true"
249249
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
250250
python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror"
251251
python -m pip list --no-cache-dir
252-
export PANDAS_CI=1
253-
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
252+
PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
254253
concurrency:
255254
# https://github.community/t/concurrecy-not-work-for-push/183068/7
256255
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-32bit
@@ -259,7 +258,7 @@ jobs:
259258
Linux-Musl:
260259
runs-on: ubuntu-22.04
261260
container:
262-
image: quay.io/pypa/musllinux_1_1_x86_64
261+
image: quay.io/pypa/musllinux_1_2_x86_64
263262
steps:
264263
- name: Checkout pandas Repo
265264
# actions/checkout does not work since it requires node
@@ -281,7 +280,7 @@ jobs:
281280
apk add musl-locales
282281
- name: Build environment
283282
run: |
284-
/opt/python/cp311-cp311/bin/python -m venv ~/virtualenvs/pandas-dev
283+
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
285284
. ~/virtualenvs/pandas-dev/bin/activate
286285
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
287286
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=7.3.2 pytest-xdist>=3.4.0 hypothesis>=6.84.0
@@ -291,8 +290,7 @@ jobs:
291290
- name: Run Tests
292291
run: |
293292
. ~/virtualenvs/pandas-dev/bin/activate
294-
export PANDAS_CI=1
295-
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
293+
PANDAS_CI=1 python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
296294
concurrency:
297295
# https://github.community/t/concurrecy-not-work-for-push/183068/7
298296
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-musl
@@ -357,8 +355,7 @@ jobs:
357355
python --version
358356
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
359357
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
360-
python -m pip install versioneer[toml]
361-
python -m pip install python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
358+
python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
362359
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"
363360
python -m pip list
364361
@@ -375,7 +372,7 @@ jobs:
375372

376373
concurrency:
377374
# https://github.community/t/concurrecy-not-work-for-push/183068/7
378-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.os }}-python-freethreading-dev
375+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-python-freethreading-dev
379376
cancel-in-progress: true
380377

381378
env:
@@ -396,14 +393,11 @@ jobs:
396393
nogil: true
397394

398395
- name: Build Environment
399-
# TODO: Once numpy 2.2.1 is out, don't install nightly version
400-
# Tests segfault with numpy 2.2.0: https://github.com/numpy/numpy/pull/27955
401396
run: |
402397
python --version
403-
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
404-
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython numpy
405-
python -m pip install versioneer[toml]
406-
python -m pip install python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
398+
python -m pip install --upgrade pip setuptools wheel numpy meson[ninja]==1.2.1 meson-python==0.13.1
399+
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple cython
400+
python -m pip install versioneer[toml] python-dateutil pytz tzdata hypothesis>=6.84.0 pytest>=7.3.2 pytest-xdist>=3.4.0 pytest-cov
407401
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"
408402
python -m pip list
409403

.pre-commit-config.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.8.6
22+
rev: v0.9.4
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -41,7 +41,7 @@ repos:
4141
pass_filenames: true
4242
require_serial: false
4343
- repo: https://github.com/codespell-project/codespell
44-
rev: v2.3.0
44+
rev: v2.4.1
4545
hooks:
4646
- id: codespell
4747
types_or: [python, rst, markdown, cython, c]
@@ -70,7 +70,7 @@ repos:
7070
- id: trailing-whitespace
7171
args: [--markdown-linebreak-ext=md]
7272
- repo: https://github.com/PyCQA/isort
73-
rev: 5.13.2
73+
rev: 6.0.0
7474
hooks:
7575
- id: isort
7676
- repo: https://github.com/asottile/pyupgrade
@@ -95,14 +95,14 @@ repos:
9595
- id: sphinx-lint
9696
args: ["--enable", "all", "--disable", "line-too-long"]
9797
- repo: https://github.com/pre-commit/mirrors-clang-format
98-
rev: v19.1.6
98+
rev: v19.1.7
9999
hooks:
100100
- id: clang-format
101101
files: ^pandas/_libs/src|^pandas/_libs/include
102102
args: [-i]
103103
types_or: [c, c++]
104104
- repo: https://github.com/trim21/pre-commit-mirror-meson
105-
rev: v1.6.1
105+
rev: v1.7.0
106106
hooks:
107107
- id: meson-fmt
108108
args: ['--inplace']

asv_bench/benchmarks/io/style.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ class Render:
1313
def setup(self, cols, rows):
1414
self.df = DataFrame(
1515
np.random.randn(rows, cols),
16-
columns=[f"float_{i+1}" for i in range(cols)],
17-
index=[f"row_{i+1}" for i in range(rows)],
16+
columns=[f"float_{i + 1}" for i in range(cols)],
17+
index=[f"row_{i + 1}" for i in range(rows)],
1818
)
1919

2020
def time_apply_render(self, cols, rows):

doc/make.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,7 @@ def latex(self, force=False):
260260
for i in range(3):
261261
self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex")
262262
raise SystemExit(
263-
"You should check the file "
264-
'"build/latex/pandas.pdf" for problems.'
263+
'You should check the file "build/latex/pandas.pdf" for problems.'
265264
)
266265
self._run_os("make")
267266
return ret_code
@@ -343,8 +342,7 @@ def main():
343342
dest="verbosity",
344343
default=0,
345344
help=(
346-
"increase verbosity (can be repeated), "
347-
"passed to the sphinx build command"
345+
"increase verbosity (can be repeated), passed to the sphinx build command"
348346
),
349347
)
350348
argparser.add_argument(

doc/source/user_guide/cookbook.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ Timeseries
874874
<https://stackoverflow.com/questions/13893227/vectorized-look-up-of-values-in-pandas-dataframe>`__
875875

876876
`Aggregation and plotting time series
877-
<https://nipunbatra.github.io/blog/visualisation/2013/05/01/aggregation-timeseries.html>`__
877+
<https://nipunbatra.github.io/blog/posts/2013-05-01-aggregation-timeseries.html>`__
878878

879879
Turn a matrix with hours in columns and days in rows into a continuous row sequence in the form of a time series.
880880
`How to rearrange a Python pandas DataFrame?
@@ -1043,7 +1043,7 @@ CSV
10431043

10441044
The :ref:`CSV <io.read_csv_table>` docs
10451045

1046-
`read_csv in action <https://wesmckinney.com/blog/update-on-upcoming-pandas-v0-10-new-file-parser-other-performance-wins/>`__
1046+
`read_csv in action <https://www.datacamp.com/tutorial/pandas-read-csv>`__
10471047

10481048
`appending to a csv
10491049
<https://stackoverflow.com/questions/17134942/pandas-dataframe-output-end-of-csv>`__

doc/source/user_guide/enhancingperf.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ prefer that Numba throw an error if it cannot compile a function in a way that
427427
speeds up your code, pass Numba the argument
428428
``nopython=True`` (e.g. ``@jit(nopython=True)``). For more on
429429
troubleshooting Numba modes, see the `Numba troubleshooting page
430-
<https://numba.pydata.org/numba-doc/latest/user/troubleshoot.html#the-compiled-code-is-too-slow>`__.
430+
<https://numba.readthedocs.io/en/stable/user/troubleshoot.html>`__.
431431

432432
Using ``parallel=True`` (e.g. ``@jit(parallel=True)``) may result in a ``SIGABRT`` if the threading layer leads to unsafe
433433
behavior. You can first `specify a safe threading layer <https://numba.readthedocs.io/en/stable/user/threading-layer.html#selecting-a-threading-layer-for-safe-parallel-execution>`__

doc/source/user_guide/groupby.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ You can also include the grouping columns if you want to operate on them.
418418
419419
.. note::
420420

421-
The ``groupby`` operation in Pandas drops the ``name`` field of the columns Index object
421+
The ``groupby`` operation in pandas drops the ``name`` field of the columns Index object
422422
after the operation. This change ensures consistency in syntax between different
423423
column selection methods within groupby operations.
424424

doc/source/user_guide/merging.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ A string argument to ``indicator`` will use the value as the name for the indica
586586
Overlapping value columns
587587
~~~~~~~~~~~~~~~~~~~~~~~~~
588588

589-
The merge ``suffixes`` argument takes a tuple of list of strings to append to
589+
The merge ``suffixes`` argument takes a tuple or list of strings to append to
590590
overlapping column names in the input :class:`DataFrame` to disambiguate the result
591591
columns:
592592

@@ -979,7 +979,7 @@ nearest key rather than equal keys. For each row in the ``left`` :class:`DataFra
979979
the last row in the ``right`` :class:`DataFrame` are selected where the ``on`` key is less
980980
than the left's key. Both :class:`DataFrame` must be sorted by the key.
981981

982-
Optionally an :func:`merge_asof` can perform a group-wise merge by matching the
982+
Optionally :func:`merge_asof` can perform a group-wise merge by matching the
983983
``by`` key in addition to the nearest match on the ``on`` key.
984984

985985
.. ipython:: python

doc/source/user_guide/pyarrow.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Data Structure Integration
2222

2323
A :class:`Series`, :class:`Index`, or the columns of a :class:`DataFrame` can be directly backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray`
2424
which is similar to a NumPy array. To construct these from the main pandas data structures, you can pass in a string of the type followed by
25-
``[pyarrow]``, e.g. ``"int64[pyarrow]""`` into the ``dtype`` parameter
25+
``[pyarrow]``, e.g. ``"int64[pyarrow]"`` into the ``dtype`` parameter
2626

2727
.. ipython:: python
2828

doc/source/user_guide/scale.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Scaling to large datasets
55
*************************
66

77
pandas provides data structures for in-memory analytics, which makes using pandas
8-
to analyze datasets that are larger than memory datasets somewhat tricky. Even datasets
8+
to analyze datasets that are larger than memory somewhat tricky. Even datasets
99
that are a sizable fraction of memory become unwieldy, as some pandas operations need
1010
to make intermediate copies.
1111

doc/source/user_guide/style.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1288,7 +1288,7 @@
12881288
"outputs": [],
12891289
"source": [
12901290
"df2.loc[:4].style.highlight_max(\n",
1291-
" axis=1, props=(\"color:white; \" \"font-weight:bold; \" \"background-color:darkblue;\")\n",
1291+
" axis=1, props=(\"color:white; font-weight:bold; background-color:darkblue;\")\n",
12921292
")"
12931293
]
12941294
},

doc/source/user_guide/timeseries.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1580,7 +1580,7 @@ the pandas objects.
15801580
ts = ts[:5]
15811581
ts.shift(1)
15821582
1583-
The ``shift`` method accepts an ``freq`` argument which can accept a
1583+
The ``shift`` method accepts a ``freq`` argument which can accept a
15841584
``DateOffset`` class or other ``timedelta``-like object or also an
15851585
:ref:`offset alias <timeseries.offset_aliases>`.
15861586

@@ -2570,7 +2570,7 @@ because daylight savings time (DST) in a local time zone causes some times to oc
25702570
twice within one day ("clocks fall back"). The following options are available:
25712571

25722572
* ``'raise'``: Raises a ``ValueError`` (the default behavior)
2573-
* ``'infer'``: Attempt to determine the correct offset base on the monotonicity of the timestamps
2573+
* ``'infer'``: Attempt to determine the correct offset based on the monotonicity of the timestamps
25742574
* ``'NaT'``: Replaces ambiguous times with ``NaT``
25752575
* ``bool``: ``True`` represents a DST time, ``False`` represents non-DST time. An array-like of ``bool`` values is supported for a sequence of times.
25762576

doc/source/whatsnew/v3.0.0.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Other enhancements
3535
- :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``
3636
- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
3737
- :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`)
38+
- :func:`pandas.merge`, :meth:`DataFrame.merge` and :meth:`DataFrame.join` now support anti joins (``left_anti`` and ``right_anti``) in the ``how`` parameter (:issue:`42916`)
3839
- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)
3940
- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
4041
- :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
@@ -59,15 +60,16 @@ Other enhancements
5960
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
6061
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
6162
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
63+
- :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` methods ``sum``, ``mean``, ``median``, ``prod``, ``min``, ``max``, ``std``, ``var`` and ``sem`` now accept ``skipna`` parameter (:issue:`15675`)
6264
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
6365
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
64-
- :meth:`.DataFrameGroupBy.mean`, :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.mean` and :meth:`.SeriesGroupBy.sum` now accept ``skipna`` parameter (:issue:`15675`)
6566
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
6667
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
6768
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
6869
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
6970
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
7071
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
72+
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
7173
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
7274
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
7375
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
@@ -631,6 +633,7 @@ Datetimelike
631633
- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56147`)
632634
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
633635
- Bug in :func:`tseries.frequencies.to_offset` would fail to parse frequency strings starting with "LWOM" (:issue:`59218`)
636+
- Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` casting ``datetime64`` and ``timedelta64`` columns to ``float64`` and losing precision (:issue:`60850`)
634637
- Bug in :meth:`Dataframe.agg` with df with missing values resulting in IndexError (:issue:`58810`)
635638
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` does not raise on Custom business days frequencies bigger then "1C" (:issue:`58664`)
636639
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
@@ -766,6 +769,7 @@ Reshaping
766769
- Bug in :meth:`DataFrame.unstack` producing incorrect results when ``sort=False`` (:issue:`54987`, :issue:`55516`)
767770
- Bug in :meth:`DataFrame.merge` when merging two :class:`DataFrame` on ``intc`` or ``uintc`` types on Windows (:issue:`60091`, :issue:`58713`)
768771
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
772+
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
769773
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
770774

771775
Sparse
@@ -789,6 +793,7 @@ Styler
789793
Other
790794
^^^^^
791795
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
796+
- Bug in :class:`Series` ignoring errors when trying to convert :class:`Series` input data to the given ``dtype`` (:issue:`60728`)
792797
- Bug in :func:`eval` on :class:`ExtensionArray` on including division ``/`` failed with a ``TypeError``. (:issue:`58748`)
793798
- Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`)
794799
- Bug in :func:`eval` with ``engine="numexpr"`` returning unexpected result for float division. (:issue:`59736`)

0 commit comments

Comments
 (0)