Skip to content

Commit c59703d

Browse files
committed
Merge branch 'main' into pandas-asan
2 parents 6442066 + 99e6897 commit c59703d

File tree

116 files changed

+1424
-910
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1424
-910
lines changed

.github/workflows/codeql.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ jobs:
2828

2929
steps:
3030
- uses: actions/checkout@v4
31-
- uses: github/codeql-action/init@v2
31+
- uses: github/codeql-action/init@v3
3232
with:
3333
languages: ${{ matrix.language }}
34-
- uses: github/codeql-action/autobuild@v2
35-
- uses: github/codeql-action/analyze@v2
34+
- uses: github/codeql-action/autobuild@v3
35+
- uses: github/codeql-action/analyze@v3

.github/workflows/docbuild-and-upload.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ jobs:
8585
run: mv doc/build/html web/build/docs
8686

8787
- name: Save website as an artifact
88-
uses: actions/upload-artifact@v3
88+
uses: actions/upload-artifact@v4
8989
with:
9090
name: website
9191
path: web/build

.github/workflows/wheels.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
python -m pip install build
6363
python -m build --sdist
6464
65-
- uses: actions/upload-artifact@v3
65+
- uses: actions/upload-artifact@v4
6666
with:
6767
name: sdist
6868
path: ./dist/*
@@ -115,7 +115,7 @@ jobs:
115115
# removes unnecessary files from the release
116116
- name: Download sdist (not macOS)
117117
#if: ${{ matrix.buildplat[1] != 'macosx_*' }}
118-
uses: actions/download-artifact@v3
118+
uses: actions/download-artifact@v4
119119
with:
120120
name: sdist
121121
path: ./dist
@@ -189,7 +189,7 @@ jobs:
189189
docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
190190
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
191191
192-
- uses: actions/upload-artifact@v3
192+
- uses: actions/upload-artifact@v4
193193
with:
194194
name: ${{ matrix.python[0] }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }}
195195
path: ./wheelhouse/*.whl

.pre-commit-config.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,11 @@ repos:
132132
types: [python]
133133
stages: [manual]
134134
additional_dependencies: &pyright_dependencies
135-
135+
136136
- id: pyright
137137
# note: assumes python env is setup and activated
138138
name: pyright reportGeneralTypeIssues
139-
entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json --level warning
139+
entry: pyright -p pyright_reportGeneralTypeIssues.json --level warning
140140
language: node
141141
pass_filenames: false
142142
types: [python]

asv_bench/benchmarks/join_merge.py

+24-14
Original file line numberDiff line numberDiff line change
@@ -275,18 +275,21 @@ def time_merge_dataframes_cross(self, sort):
275275

276276
class MergeEA:
277277
params = [
278-
"Int64",
279-
"Int32",
280-
"Int16",
281-
"UInt64",
282-
"UInt32",
283-
"UInt16",
284-
"Float64",
285-
"Float32",
278+
[
279+
"Int64",
280+
"Int32",
281+
"Int16",
282+
"UInt64",
283+
"UInt32",
284+
"UInt16",
285+
"Float64",
286+
"Float32",
287+
],
288+
[True, False],
286289
]
287-
param_names = ["dtype"]
290+
param_names = ["dtype", "monotonic"]
288291

289-
def setup(self, dtype):
292+
def setup(self, dtype, monotonic):
290293
N = 10_000
291294
indices = np.arange(1, N)
292295
key = np.tile(indices[:8000], 10)
@@ -299,8 +302,11 @@ def setup(self, dtype):
299302
"value2": np.random.randn(7999),
300303
}
301304
)
305+
if monotonic:
306+
self.left = self.left.sort_values("key")
307+
self.right = self.right.sort_values("key")
302308

303-
def time_merge(self, dtype):
309+
def time_merge(self, dtype, monotonic):
304310
merge(self.left, self.right)
305311

306312

@@ -330,10 +336,11 @@ class MergeDatetime:
330336
("ns", "ms"),
331337
],
332338
[None, "Europe/Brussels"],
339+
[True, False],
333340
]
334-
param_names = ["units", "tz"]
341+
param_names = ["units", "tz", "monotonic"]
335342

336-
def setup(self, units, tz):
343+
def setup(self, units, tz, monotonic):
337344
unit_left, unit_right = units
338345
N = 10_000
339346
keys = Series(date_range("2012-01-01", freq="min", periods=N, tz=tz))
@@ -349,8 +356,11 @@ def setup(self, units, tz):
349356
"value2": np.random.randn(8000),
350357
}
351358
)
359+
if monotonic:
360+
self.left = self.left.sort_values("key")
361+
self.right = self.right.sort_values("key")
352362

353-
def time_merge(self, units, tz):
363+
def time_merge(self, units, tz, monotonic):
354364
merge(self.left, self.right)
355365

356366

doc/source/conf.py

-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
"numpydoc",
5959
"sphinx_copybutton",
6060
"sphinx_design",
61-
"sphinx_toggleprompt",
6261
"sphinx.ext.autodoc",
6362
"sphinx.ext.autosummary",
6463
"sphinx.ext.coverage",

doc/source/reference/groupby.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
=======
66
GroupBy
77
=======
8-
.. currentmodule:: pandas.api.typing
8+
.. currentmodule:: pandas.core.groupby
99

1010
:class:`pandas.api.typing.DataFrameGroupBy` and :class:`pandas.api.typing.SeriesGroupBy`
1111
instances are returned by groupby calls :func:`pandas.DataFrame.groupby` and
@@ -40,7 +40,7 @@ Function application helper
4040

4141
NamedAgg
4242

43-
.. currentmodule:: pandas.api.typing
43+
.. currentmodule:: pandas.core.groupby
4444

4545
Function application
4646
--------------------

doc/source/reference/resampling.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
==========
66
Resampling
77
==========
8-
.. currentmodule:: pandas.api.typing
8+
.. currentmodule:: pandas.core.resample
99

1010
:class:`pandas.api.typing.Resampler` instances are returned by
1111
resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`.

doc/source/reference/window.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ calls: :func:`pandas.DataFrame.ewm` and :func:`pandas.Series.ewm`.
1717

1818
Rolling window functions
1919
------------------------
20-
.. currentmodule:: pandas.api.typing
20+
.. currentmodule:: pandas.core.window.rolling
2121

2222
.. autosummary::
2323
:toctree: api/
@@ -44,7 +44,7 @@ Rolling window functions
4444

4545
Weighted window functions
4646
-------------------------
47-
.. currentmodule:: pandas.api.typing
47+
.. currentmodule:: pandas.core.window.rolling
4848

4949
.. autosummary::
5050
:toctree: api/
@@ -58,7 +58,7 @@ Weighted window functions
5858

5959
Expanding window functions
6060
--------------------------
61-
.. currentmodule:: pandas.api.typing
61+
.. currentmodule:: pandas.core.window.expanding
6262

6363
.. autosummary::
6464
:toctree: api/
@@ -85,7 +85,7 @@ Expanding window functions
8585

8686
Exponentially-weighted window functions
8787
---------------------------------------
88-
.. currentmodule:: pandas.api.typing
88+
.. currentmodule:: pandas.core.window.ewm
8989

9090
.. autosummary::
9191
:toctree: api/

doc/source/whatsnew/v0.21.0.rst

+11-6
Original file line numberDiff line numberDiff line change
@@ -635,17 +635,22 @@ Previous behavior:
635635
636636
New behavior:
637637

638-
.. ipython:: python
638+
.. code-block:: ipython
639639
640-
pi = pd.period_range('2017-01', periods=12, freq='M')
640+
In [1]: pi = pd.period_range('2017-01', periods=12, freq='M')
641641
642-
s = pd.Series(np.arange(12), index=pi)
642+
In [2]: s = pd.Series(np.arange(12), index=pi)
643643
644-
resampled = s.resample('2Q').mean()
644+
In [3]: resampled = s.resample('2Q').mean()
645645
646-
resampled
646+
In [4]: resampled
647+
Out[4]:
648+
2017Q1 2.5
649+
2017Q3 8.5
650+
Freq: 2Q-DEC, dtype: float64
647651
648-
resampled.index
652+
In [5]: resampled.index
653+
Out[5]: PeriodIndex(['2017Q1', '2017Q3'], dtype='period[2Q-DEC]')
649654
650655
Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior.
651656

doc/source/whatsnew/v2.2.0.rst

+13-4
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ Other enhancements
229229
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
230230
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
231231
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
232+
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
232233
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
233234
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
234235
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
@@ -345,6 +346,8 @@ Optional libraries below the lowest tested version may still work, but are not c
345346
+-----------------+-----------------+---------+
346347
| Package | Minimum Version | Changed |
347348
+=================+=================+=========+
349+
| mypy (dev) | 1.7.1 | X |
350+
+-----------------+-----------------+---------+
348351
| | | X |
349352
+-----------------+-----------------+---------+
350353

@@ -354,8 +357,8 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor
354357

355358
Other API changes
356359
^^^^^^^^^^^^^^^^^
360+
- The hash values of nullable extension dtypes changed to improve the performance of the hashing operation (:issue:`56507`)
357361
- ``check_exact`` now only takes effect for floating-point dtypes in :func:`testing.assert_frame_equal` and :func:`testing.assert_series_equal`. In particular, integer dtypes are always checked exactly (:issue:`55882`)
358-
-
359362

360363
.. ---------------------------------------------------------------------------
361364
.. _whatsnew_220.deprecations:
@@ -441,6 +444,7 @@ Other Deprecations
441444
- Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`)
442445
- Deprecated :meth:`Index.format`, use ``index.astype(str)`` or ``index.map(formatter)`` instead (:issue:`55413`)
443446
- Deprecated :meth:`Series.ravel`, the underlying array is already 1D, so ravel is not necessary (:issue:`52511`)
447+
- Deprecated :meth:`Series.resample` and :meth:`DataFrame.resample` with a :class:`PeriodIndex` (and the 'convention' keyword), convert to :class:`DatetimeIndex` (with ``.to_timestamp()``) before resampling instead (:issue:`53481`)
444448
- Deprecated :meth:`Series.view`, use :meth:`Series.astype` instead to change the dtype (:issue:`20251`)
445449
- Deprecated ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock``, use public APIs instead (:issue:`55139`)
446450
- Deprecated ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
@@ -476,7 +480,7 @@ Other Deprecations
476480
- Deprecated strings ``H``, ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
477481
- Deprecated strings ``H``, ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
478482
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
479-
- Deprecated the :class:`.BaseGrouper` attributes ``group_keys_seq`` and ``reconstructed_codes``; these will be removed in a future version of pandas (:issue:`56148`)
483+
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
480484
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
481485
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
482486
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
@@ -498,6 +502,7 @@ Performance improvements
498502
- Performance improvement in :func:`.testing.assert_frame_equal` and :func:`.testing.assert_series_equal` (:issue:`55949`, :issue:`55971`)
499503
- Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
500504
- Performance improvement in :func:`get_dummies` (:issue:`56089`)
505+
- Performance improvement in :func:`merge` and :func:`merge_ordered` when joining on sorted ascending keys (:issue:`56115`)
501506
- Performance improvement in :func:`merge_asof` when ``by`` is not ``None`` (:issue:`55580`, :issue:`55678`)
502507
- Performance improvement in :func:`read_stata` for files with many variables (:issue:`55515`)
503508
- Performance improvement in :meth:`DataFrame.groupby` when aggregating pyarrow timestamp and duration dtypes (:issue:`55031`)
@@ -514,6 +519,7 @@ Performance improvements
514519
- Performance improvement in :meth:`Series.value_counts` and :meth:`Series.mode` for masked dtypes (:issue:`54984`, :issue:`55340`)
515520
- Performance improvement in :meth:`.DataFrameGroupBy.nunique` and :meth:`.SeriesGroupBy.nunique` (:issue:`55972`)
516521
- Performance improvement in :meth:`.SeriesGroupBy.idxmax`, :meth:`.SeriesGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.DataFrameGroupBy.idxmin` (:issue:`54234`)
522+
- Performance improvement when hashing a nullable extension array (:issue:`56507`)
517523
- Performance improvement when indexing into a non-unique index (:issue:`55816`)
518524
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)
519525
- Performance improvement when localizing time to UTC (:issue:`55241`)
@@ -658,11 +664,11 @@ Groupby/resample/rolling
658664
- Bug in :meth:`.DataFrameGroupBy.value_counts` and :meth:`.SeriesGroupBy.value_count` would sort by proportions rather than frequencies when ``sort=True`` and ``normalize=True`` (:issue:`55951`)
659665
- Bug in :meth:`DataFrame.asfreq` and :meth:`Series.asfreq` with a :class:`DatetimeIndex` with non-nanosecond resolution incorrectly converting to nanosecond resolution (:issue:`55958`)
660666
- Bug in :meth:`DataFrame.ewm` when passed ``times`` with non-nanosecond ``datetime64`` or :class:`DatetimeTZDtype` dtype (:issue:`56262`)
667+
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` where grouping by a combination of ``Decimal`` and NA values would fail when ``sort=True`` (:issue:`54847`)
661668
- Bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
662669
- Bug in :meth:`DataFrame.resample` when resampling on a :class:`ArrowDtype` of ``pyarrow.timestamp`` or ``pyarrow.duration`` type (:issue:`55989`)
663670
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
664671
- Bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
665-
-
666672

667673
Reshaping
668674
^^^^^^^^^
@@ -671,10 +677,12 @@ Reshaping
671677
- Bug in :func:`merge_asof` raising ``TypeError`` when ``by`` dtype is not ``object``, ``int64``, or ``uint64`` (:issue:`22794`)
672678
- Bug in :func:`merge_asof` raising incorrect error for string dtype (:issue:`56444`)
673679
- Bug in :func:`merge_asof` when using a :class:`Timedelta` tolerance on a :class:`ArrowDtype` column (:issue:`56486`)
680+
- Bug in :func:`merge` not raising when merging string columns with numeric columns (:issue:`56441`)
674681
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
675682
- Bug in :meth:`DataFrame.melt` where an exception was raised if ``var_name`` was not a string (:issue:`55948`)
676683
- Bug in :meth:`DataFrame.melt` where it would not preserve the datetime (:issue:`55254`)
677684
- Bug in :meth:`DataFrame.pivot_table` where the row margin is incorrect when the columns have numeric names (:issue:`26568`)
685+
- Bug in :meth:`DataFrame.pivot` with numeric columns and extension dtype for data (:issue:`56528`)
678686

679687
Sparse
680688
^^^^^^
@@ -699,9 +707,10 @@ Other
699707
- Bug in :func:`infer_freq` and :meth:`DatetimeIndex.inferred_freq` with weekly frequencies and non-nanosecond resolutions (:issue:`55609`)
700708
- Bug in :meth:`DataFrame.apply` where passing ``raw=True`` ignored ``args`` passed to the applied function (:issue:`55009`)
701709
- Bug in :meth:`DataFrame.from_dict` which would always sort the rows of the created :class:`DataFrame`. (:issue:`55683`)
710+
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` raising a ``ValueError`` (:issue:`56478`)
702711
- Bug in rendering ``inf`` values inside a a :class:`DataFrame` with the ``use_inf_as_na`` option enabled (:issue:`55483`)
703712
- Bug in rendering a :class:`Series` with a :class:`MultiIndex` when one of the index level's names is 0 not having that name displayed (:issue:`55415`)
704-
- Bug in the error message when assigning an empty dataframe to a column (:issue:`55956`)
713+
- Bug in the error message when assigning an empty :class:`DataFrame` to a column (:issue:`55956`)
705714
- Bug when time-like strings were being cast to :class:`ArrowDtype` with ``pyarrow.time64`` type (:issue:`56463`)
706715

707716
.. ***DO NOT USE THIS SECTION***

environment.yml

+6-7
Original file line numberDiff line numberDiff line change
@@ -68,17 +68,17 @@ dependencies:
6868
- flask
6969

7070
# benchmarks
71-
- asv>=0.5.1
71+
- asv>=0.6.1
7272

7373
## The compiler packages are meta-packages and install the correct compiler (activation) packages on the respective platforms.
7474
- c-compiler
7575
- cxx-compiler
7676

7777
# code checks
78-
- flake8=6.0.0 # run in subprocess over docstring examples
79-
- mypy=1.4.1 # pre-commit uses locally installed mypy
78+
- flake8=6.1.0 # run in subprocess over docstring examples
79+
- mypy=1.7.1 # pre-commit uses locally installed mypy
8080
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
81-
- pre-commit>=2.15.0
81+
- pre-commit>=3.6.0
8282

8383
# documentation
8484
- gitpython # obtain contributors from git for whatsnew
@@ -98,12 +98,12 @@ dependencies:
9898
- types-setuptools
9999

100100
# documentation (jupyter notebooks)
101-
- nbconvert>=6.4.5
101+
- nbconvert>=7.11.0
102102
- nbsphinx
103103
- pandoc
104104
- ipywidgets
105105
- nbformat
106-
- notebook>=6.0.3
106+
- notebook>=7.0.6
107107
- ipykernel
108108

109109
# web
@@ -118,6 +118,5 @@ dependencies:
118118
- adbc-driver-postgresql>=0.8.0
119119
- adbc-driver-sqlite>=0.8.0
120120
- dataframe-api-compat>=0.1.7
121-
- sphinx-toggleprompt # conda-forge version has stricter pins on jinja2
122121
- typing_extensions; python_version<"3.11"
123122
- tzdata>=2022.7

pandas/_config/config.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ def get_default_val(pat: str):
220220
class DictWrapper:
221221
"""provide attribute-style access to a nested dict"""
222222

223+
d: dict[str, Any]
224+
223225
def __init__(self, d: dict[str, Any], prefix: str = "") -> None:
224226
object.__setattr__(self, "d", d)
225227
object.__setattr__(self, "prefix", prefix)
@@ -250,7 +252,7 @@ def __getattr__(self, key: str):
250252
else:
251253
return _get_option(prefix)
252254

253-
def __dir__(self) -> Iterable[str]:
255+
def __dir__(self) -> list[str]:
254256
return list(self.d.keys())
255257

256258

0 commit comments

Comments
 (0)