Skip to content

Commit c2ab34c

Browse files
Merge remote-tracking branch 'upstream/main' into pandas-devgh-49236
2 parents cff22d4 + 0cdc6a4 commit c2ab34c

File tree

115 files changed

+894
-381
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+894
-381
lines changed

.circleci/config.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ jobs:
1616
- run:
1717
name: Install Environment and Run Tests
1818
shell: /bin/bash -exo pipefail
19+
# https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd
1920
command: |
2021
MINI_URL="https://github.com/conda-forge/miniforge/releases/download/24.3.0-0/Miniforge3-24.3.0-0-Linux-aarch64.sh"
2122
wget -q $MINI_URL -O Miniforge3.sh
@@ -33,6 +34,7 @@ jobs:
3334
fi
3435
python -m pip install --no-build-isolation -ve . --config-settings=setup-args="--werror"
3536
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
37+
sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
3638
ci/run_tests.sh
3739
test-linux-musl:
3840
docker:
@@ -89,8 +91,8 @@ jobs:
8991
name: Build aarch64 wheels
9092
no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
9193
command: |
92-
pip3 install cibuildwheel==2.18.1
93-
cibuildwheel --prerelease-pythons --output-dir wheelhouse
94+
pip3 install cibuildwheel==2.20.0
95+
cibuildwheel --output-dir wheelhouse
9496
9597
environment:
9698
CIBW_BUILD: << parameters.cibw-build >>

.github/workflows/code-checks.yml

+5
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ jobs:
5151
# TODO: The doctests have to be run first right now, since the Cython doctests only work
5252
# with pandas installed in non-editable mode
5353
# This can be removed once pytest-cython doesn't require C extensions to be installed inplace
54+
55+
- name: Extra installs
56+
# https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd
57+
run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
58+
5459
- name: Run doctests
5560
run: cd ci && ./code_checks.sh doctests
5661
if: ${{ steps.build.outcome == 'success' && always() }}

.github/workflows/docbuild-and-upload.yml

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ jobs:
4646
- name: Build Pandas
4747
uses: ./.github/actions/build_pandas
4848

49+
- name: Extra installs
50+
# https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd
51+
run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
52+
4953
- name: Test website
5054
run: python -m pytest web/
5155

.github/workflows/unit-tests.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ jobs:
134134
fetch-depth: 0
135135

136136
- name: Extra installs
137-
run: sudo apt-get update && sudo apt-get install -y ${{ matrix.extra_apt }}
138-
if: ${{ matrix.extra_apt }}
137+
# https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd
138+
run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0 ${{ matrix.extra_apt || ''}}
139139

140140
- name: Generate extra locales
141141
# These extra locales will be available for locale.setlocale() calls in tests

.github/workflows/wheels.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,10 @@ jobs:
158158
run: echo "sdist_name=$(cd ./dist && ls -d */)" >> "$GITHUB_ENV"
159159

160160
- name: Build wheels
161-
uses: pypa/cibuildwheel@v2.19.2
161+
uses: pypa/cibuildwheel@v2.20.0
162162
with:
163163
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
164164
env:
165-
CIBW_PRERELEASE_PYTHONS: True
166165
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
167166
CIBW_BUILD_FRONTEND: ${{ matrix.cibw_build_frontend || 'pip' }}
168167
CIBW_PLATFORM: ${{ matrix.buildplat[1] == 'pyodide_wasm32' && 'pyodide' || 'auto' }}

ci/code_checks.sh

-12
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7070
--format=actions \
7171
-i ES01 `# For now it is ok if docstrings are missing the extended summary` \
7272
-i "pandas.Series.dt PR01" `# Accessors are implemented as classes, but we do not document the Parameters section` \
73-
-i "pandas.MultiIndex.get_level_values SA01" \
74-
-i "pandas.MultiIndex.get_loc_level PR07" \
7573
-i "pandas.MultiIndex.names SA01" \
7674
-i "pandas.MultiIndex.reorder_levels RT03,SA01" \
7775
-i "pandas.MultiIndex.sortlevel PR07,SA01" \
@@ -165,9 +163,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
165163
-i "pandas.Series.str.center RT03,SA01" \
166164
-i "pandas.Series.str.decode PR07,RT03,SA01" \
167165
-i "pandas.Series.str.encode PR07,RT03,SA01" \
168-
-i "pandas.Series.str.find RT03" \
169-
-i "pandas.Series.str.fullmatch RT03" \
170-
-i "pandas.Series.str.get RT03,SA01" \
171166
-i "pandas.Series.str.index RT03" \
172167
-i "pandas.Series.str.ljust RT03,SA01" \
173168
-i "pandas.Series.str.lower RT03" \
@@ -177,7 +172,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
177172
-i "pandas.Series.str.partition RT03" \
178173
-i "pandas.Series.str.repeat SA01" \
179174
-i "pandas.Series.str.replace SA01" \
180-
-i "pandas.Series.str.rfind RT03" \
181175
-i "pandas.Series.str.rindex RT03" \
182176
-i "pandas.Series.str.rjust RT03,SA01" \
183177
-i "pandas.Series.str.rpartition RT03" \
@@ -267,9 +261,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
267261
-i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
268262
-i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
269263
-i "pandas.api.extensions.ExtensionArray.view SA01" \
270-
-i "pandas.api.indexers.VariableOffsetWindowIndexer PR01,SA01" \
271264
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
272-
-i "pandas.api.types.is_any_real_numeric_dtype SA01" \
273265
-i "pandas.api.types.is_bool PR01,SA01" \
274266
-i "pandas.api.types.is_bool_dtype SA01" \
275267
-i "pandas.api.types.is_categorical_dtype SA01" \
@@ -291,13 +283,9 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
291283
-i "pandas.api.types.is_iterator PR07,SA01" \
292284
-i "pandas.api.types.is_list_like SA01" \
293285
-i "pandas.api.types.is_named_tuple PR07,SA01" \
294-
-i "pandas.api.types.is_numeric_dtype SA01" \
295286
-i "pandas.api.types.is_object_dtype SA01" \
296-
-i "pandas.api.types.is_period_dtype SA01" \
297287
-i "pandas.api.types.is_re PR07,SA01" \
298288
-i "pandas.api.types.is_re_compilable PR07,SA01" \
299-
-i "pandas.api.types.is_sparse SA01" \
300-
-i "pandas.api.types.is_timedelta64_ns_dtype SA01" \
301289
-i "pandas.api.types.pandas_dtype PR07,RT03,SA01" \
302290
-i "pandas.arrays.ArrowExtensionArray PR07,SA01" \
303291
-i "pandas.arrays.BooleanArray SA01" \

doc/source/development/contributing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ If you are new to Git, you can reference some of these resources for learning Gi
7474
to the :ref:`contributor community <community>` for help if needed:
7575

7676
* `Git documentation <https://git-scm.com/doc>`_.
77-
* `Numpy's Git resources <https://numpy.org/doc/stable/dev/gitwash/git_resources.html>`_ tutorial.
7877

7978
Also, the project follows a forking workflow further described on this page whereby
8079
contributors fork the repository, make changes and then create a pull request.

doc/source/user_guide/basics.rst

-10
Original file line numberDiff line numberDiff line change
@@ -155,16 +155,6 @@ speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottle
155155
a set of specialized cython routines that are especially fast when dealing with arrays that have
156156
``nans``.
157157

158-
Here is a sample (using 100 column x 100,000 row ``DataFrames``):
159-
160-
.. csv-table::
161-
:header: "Operation", "0.11.0 (ms)", "Prior Version (ms)", "Ratio to Prior"
162-
:widths: 25, 25, 25, 25
163-
164-
``df1 > df2``, 13.32, 125.35, 0.1063
165-
``df1 * df2``, 21.71, 36.63, 0.5928
166-
``df1 + df2``, 22.04, 36.50, 0.6039
167-
168158
You are highly encouraged to install both libraries. See the section
169159
:ref:`Recommended Dependencies <install.recommended_dependencies>` for more installation info.
170160

doc/source/whatsnew/v3.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Other enhancements
5050
- :meth:`DataFrame.pivot_table` and :func:`pivot_table` now allow the passing of keyword arguments to ``aggfunc`` through ``**kwargs`` (:issue:`57884`)
5151
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5252
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
53+
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
5354
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
5455
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
5556
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
@@ -547,6 +548,7 @@ Strings
547548

548549
Interval
549550
^^^^^^^^
551+
- :meth:`Index.is_monotonic_decreasing`, :meth:`Index.is_monotonic_increasing`, and :meth:`Index.is_unique` could incorrectly be ``False`` for an ``Index`` created from a slice of another ``Index``. (:issue:`57911`)
550552
- Bug in :func:`interval_range` where start and end numeric types were always cast to 64 bit (:issue:`57268`)
551553
-
552554

@@ -609,6 +611,7 @@ Groupby/resample/rolling
609611
- Bug in :meth:`DataFrameGroupBy.agg` that raises ``AttributeError`` when there is dictionary input and duplicated columns, instead of returning a DataFrame with the aggregation of all duplicate columns. (:issue:`55041`)
610612
- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
611613
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
614+
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
612615
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
613616
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
614617
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)

pandas/_libs/index.pyx

+32-12
Original file line numberDiff line numberDiff line change
@@ -252,14 +252,24 @@ cdef class IndexEngine:
252252
return self.sizeof()
253253

254254
cpdef _update_from_sliced(self, IndexEngine other, reverse: bool):
255-
self.unique = other.unique
256-
self.need_unique_check = other.need_unique_check
255+
if other.unique:
256+
self.unique = other.unique
257+
self.need_unique_check = other.need_unique_check
258+
257259
if not other.need_monotonic_check and (
258260
other.is_monotonic_increasing or other.is_monotonic_decreasing):
259-
self.need_monotonic_check = other.need_monotonic_check
260-
# reverse=True means the index has been reversed
261-
self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc
262-
self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec
261+
self.need_monotonic_check = 0
262+
if len(self.values) > 0 and self.values[0] != self.values[-1]:
263+
# reverse=True means the index has been reversed
264+
if reverse:
265+
self.monotonic_inc = other.monotonic_dec
266+
self.monotonic_dec = other.monotonic_inc
267+
else:
268+
self.monotonic_inc = other.monotonic_inc
269+
self.monotonic_dec = other.monotonic_dec
270+
else:
271+
self.monotonic_inc = 1
272+
self.monotonic_dec = 1
263273

264274
@property
265275
def is_unique(self) -> bool:
@@ -882,14 +892,24 @@ cdef class SharedEngine:
882892
pass
883893

884894
cpdef _update_from_sliced(self, ExtensionEngine other, reverse: bool):
885-
self.unique = other.unique
886-
self.need_unique_check = other.need_unique_check
895+
if other.unique:
896+
self.unique = other.unique
897+
self.need_unique_check = other.need_unique_check
898+
887899
if not other.need_monotonic_check and (
888900
other.is_monotonic_increasing or other.is_monotonic_decreasing):
889-
self.need_monotonic_check = other.need_monotonic_check
890-
# reverse=True means the index has been reversed
891-
self.monotonic_inc = other.monotonic_dec if reverse else other.monotonic_inc
892-
self.monotonic_dec = other.monotonic_inc if reverse else other.monotonic_dec
901+
self.need_monotonic_check = 0
902+
if len(self.values) > 0 and self.values[0] != self.values[-1]:
903+
# reverse=True means the index has been reversed
904+
if reverse:
905+
self.monotonic_inc = other.monotonic_dec
906+
self.monotonic_dec = other.monotonic_inc
907+
else:
908+
self.monotonic_inc = other.monotonic_inc
909+
self.monotonic_dec = other.monotonic_dec
910+
else:
911+
self.monotonic_inc = 1
912+
self.monotonic_dec = 1
893913

894914
@property
895915
def is_unique(self) -> bool:

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -2702,7 +2702,7 @@ def maybe_convert_objects(ndarray[object] objects,
27022702
if using_string_dtype() and is_string_array(objects, skipna=True):
27032703
from pandas.core.arrays.string_ import StringDtype
27042704

2705-
dtype = StringDtype(storage="pyarrow", na_value=np.nan)
2705+
dtype = StringDtype(na_value=np.nan)
27062706
return dtype.construct_array_type()._from_sequence(objects, dtype=dtype)
27072707

27082708
elif convert_to_nullable_dtype and is_string_array(objects, skipna=True):

pandas/_libs/tslibs/offsets.pyx

+6
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,12 @@ cdef class BaseOffset:
491491
elif is_integer_object(other):
492492
return type(self)(n=other * self.n, normalize=self.normalize,
493493
**self.kwds)
494+
elif isinstance(other, BaseOffset):
495+
# Otherwise raises RecurrsionError due to __rmul__
496+
raise TypeError(
497+
f"Cannot multiply {type(self).__name__} with "
498+
f"{type(other).__name__}."
499+
)
494500
return NotImplemented
495501

496502
def __rmul__(self, other):

pandas/_testing/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
import numpy as np
1414

15+
from pandas._config import using_string_dtype
1516
from pandas._config.localization import (
1617
can_set_locale,
1718
get_locales,
@@ -106,7 +107,10 @@
106107
ALL_FLOAT_DTYPES: list[Dtype] = [*FLOAT_NUMPY_DTYPES, *FLOAT_EA_DTYPES]
107108

108109
COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
109-
STRING_DTYPES: list[Dtype] = [str, "str", "U"]
110+
if using_string_dtype():
111+
STRING_DTYPES: list[Dtype] = [str, "U"]
112+
else:
113+
STRING_DTYPES: list[Dtype] = [str, "str", "U"] # type: ignore[no-redef]
110114
COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
111115

112116
DATETIME64_DTYPES: list[Dtype] = ["datetime64[ns]", "M8[ns]"]

pandas/_testing/asserters.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,24 @@ def assert_extension_array_equal(
796796
left_na, right_na, obj=f"{obj} NA mask", index_values=index_values
797797
)
798798

799+
# Specifically for StringArrayNumpySemantics, validate here we have a valid array
800+
if (
801+
isinstance(left.dtype, StringDtype)
802+
and left.dtype.storage == "python"
803+
and left.dtype.na_value is np.nan
804+
):
805+
assert np.all(
806+
[np.isnan(val) for val in left._ndarray[left_na]] # type: ignore[attr-defined]
807+
), "wrong missing value sentinels"
808+
if (
809+
isinstance(right.dtype, StringDtype)
810+
and right.dtype.storage == "python"
811+
and right.dtype.na_value is np.nan
812+
):
813+
assert np.all(
814+
[np.isnan(val) for val in right._ndarray[right_na]] # type: ignore[attr-defined]
815+
), "wrong missing value sentinels"
816+
799817
left_valid = left[~left_na].to_numpy(dtype=object)
800818
right_valid = right[~right_na].to_numpy(dtype=object)
801819
if check_exact:
@@ -1158,7 +1176,10 @@ def assert_frame_equal(
11581176
Specify how to compare internal data. If False, compare by columns.
11591177
If True, compare by blocks.
11601178
check_exact : bool, default False
1161-
Whether to compare number exactly.
1179+
Whether to compare number exactly. If False, the comparison uses the
1180+
relative tolerance (``rtol``) and absolute tolerance (``atol``)
1181+
parameters to determine if two values are considered close,
1182+
according to the formula: ``|a - b| <= (atol + rtol * |b|)``.
11621183
11631184
.. versionchanged:: 2.2.0
11641185

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
)
2626
from pandas.compat.numpy import is_numpy_dev
2727
from pandas.compat.pyarrow import (
28+
HAS_PYARROW,
2829
pa_version_under10p1,
2930
pa_version_under11p0,
3031
pa_version_under13p0,
@@ -156,6 +157,7 @@ def is_ci_environment() -> bool:
156157
"pa_version_under14p1",
157158
"pa_version_under16p0",
158159
"pa_version_under17p0",
160+
"HAS_PYARROW",
159161
"IS64",
160162
"ISMUSL",
161163
"PY311",

pandas/compat/pyarrow.py

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
pa_version_under15p0 = _palv < Version("15.0.0")
1818
pa_version_under16p0 = _palv < Version("16.0.0")
1919
pa_version_under17p0 = _palv < Version("17.0.0")
20+
HAS_PYARROW = True
2021
except ImportError:
2122
pa_version_under10p1 = True
2223
pa_version_under11p0 = True
@@ -27,3 +28,4 @@
2728
pa_version_under15p0 = True
2829
pa_version_under16p0 = True
2930
pa_version_under17p0 = True
31+
HAS_PYARROW = False

pandas/conftest.py

+4
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,7 @@ def string_storage(request):
13131313
("python", pd.NA),
13141314
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13151315
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
1316+
("python", np.nan),
13161317
]
13171318
)
13181319
def string_dtype_arguments(request):
@@ -1374,12 +1375,14 @@ def object_dtype(request):
13741375
("python", pd.NA),
13751376
pytest.param(("pyarrow", pd.NA), marks=td.skip_if_no("pyarrow")),
13761377
pytest.param(("pyarrow", np.nan), marks=td.skip_if_no("pyarrow")),
1378+
("python", np.nan),
13771379
],
13781380
ids=[
13791381
"string=object",
13801382
"string=string[python]",
13811383
"string=string[pyarrow]",
13821384
"string=str[pyarrow]",
1385+
"string=str[python]",
13831386
],
13841387
)
13851388
def any_string_dtype(request):
@@ -1389,6 +1392,7 @@ def any_string_dtype(request):
13891392
* 'string[python]' (NA variant)
13901393
* 'string[pyarrow]' (NA variant)
13911394
* 'str' (NaN variant, with pyarrow)
1395+
* 'str' (NaN variant, without pyarrow)
13921396
"""
13931397
if isinstance(request.param, np.dtype):
13941398
return request.param

pandas/core/arrays/arrow/array.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,10 @@ def __getitem__(self, item: PositionalIndexer):
575575
if isinstance(item, np.ndarray):
576576
if not len(item):
577577
# Removable once we migrate StringDtype[pyarrow] to ArrowDtype[string]
578-
if self._dtype.name == "string" and self._dtype.storage == "pyarrow":
578+
if (
579+
isinstance(self._dtype, StringDtype)
580+
and self._dtype.storage == "pyarrow"
581+
):
579582
# TODO(infer_string) should this be large_string?
580583
pa_dtype = pa.string()
581584
else:

0 commit comments

Comments
 (0)