Skip to content

Commit d586f72

Browse files
Merge branch 'main' into main
2 parents d8ad329 + 57a4fb9 commit d586f72

File tree

23 files changed

+233
-62
lines changed

23 files changed

+233
-62
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.NA SA01" \
7474
-i "pandas.Period.freq GL08" \
7575
-i "pandas.Period.ordinal GL08" \
76-
-i "pandas.Period.strftime PR01,SA01" \
7776
-i "pandas.Period.to_timestamp SA01" \
78-
-i "pandas.PeriodDtype SA01" \
7977
-i "pandas.PeriodDtype.freq SA01" \
80-
-i "pandas.RangeIndex PR07" \
8178
-i "pandas.RangeIndex.from_range PR01,SA01" \
8279
-i "pandas.RangeIndex.start SA01" \
8380
-i "pandas.RangeIndex.step SA01" \
@@ -135,7 +132,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
135132
-i "pandas.Timestamp.value GL08" \
136133
-i "pandas.Timestamp.year GL08" \
137134
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
138-
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
139135
-i "pandas.api.types.is_bool PR01,SA01" \
140136
-i "pandas.api.types.is_categorical_dtype SA01" \
141137
-i "pandas.api.types.is_complex PR01,SA01" \
@@ -222,7 +218,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
222218
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
223219
-i "pandas.core.resample.Resampler.var SA01" \
224220
-i "pandas.date_range RT03" \
225-
-i "pandas.errors.AbstractMethodError PR01,SA01" \
226221
-i "pandas.errors.AttributeConflictWarning SA01" \
227222
-i "pandas.errors.CSSWarning SA01" \
228223
-i "pandas.errors.CategoricalConversionWarning SA01" \

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ notable_bug_fix1
5353

5454
Deprecations
5555
~~~~~~~~~~~~
56-
-
56+
- Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`)
5757
-
5858

5959
.. ---------------------------------------------------------------------------
@@ -103,7 +103,8 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106-
-
106+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107+
107108

108109
Interval
109110
^^^^^^^^

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ Performance improvements
503503
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
504504
- :meth:`Series.str.partition` with :class:`ArrowDtype` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57768`)
505505
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
506+
- Performance improvement in :class:`MultiIndex` when setting :attr:`MultiIndex.names` doesn't invalidate all cached operations (:issue:`59578`)
506507
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
507508
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
508509
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)

pandas/_libs/tslibs/period.pyx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2755,6 +2755,27 @@ cdef class _Period(PeriodMixin):
27552755
| ``%%`` | A literal ``'%'`` character. | |
27562756
+-----------+--------------------------------+-------+
27572757

2758+
The `strftime` method provides a way to represent a :class:`Period`
2759+
object as a string in a specified format. This is particularly useful
2760+
when displaying date and time data in different locales or customized
2761+
formats, suitable for reports or user interfaces. It extends the standard
2762+
Python string formatting capabilities with additional directives specific
2763+
to `pandas`, accommodating features like fiscal years and precise
2764+
sub-second components.
2765+
2766+
Parameters
2767+
----------
2768+
fmt : str or None
2769+
String containing the desired format directives. If ``None``, the
2770+
format is determined based on the Period's frequency.
2771+
2772+
See Also
2773+
--------
2774+
Timestamp.strftime : Return a formatted string of the Timestamp.
2775+
to_datetime : Convert argument to datetime.
2776+
time.strftime : Format a time object as a string according to a
2777+
specified format string in the standard Python library.
2778+
27582779
Notes
27592780
-----
27602781

pandas/core/array_algos/quantile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def quantile_with_mask(
9191
if is_empty:
9292
# create the array of na_values
9393
# 2d len(values) * len(qs)
94-
flat = np.array([fill_value] * len(qs))
94+
flat = np.full(len(qs), fill_value)
9595
result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
9696
else:
9797
result = _nanquantile(

pandas/core/arrays/_arrow_string_mixins.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
from typing import (
45
TYPE_CHECKING,
56
Literal,
67
)
78

89
import numpy as np
910

10-
from pandas.compat import pa_version_under10p1
11+
from pandas.compat import (
12+
pa_version_under10p1,
13+
pa_version_under17p0,
14+
)
1115

1216
from pandas.core.dtypes.missing import isna
1317

@@ -49,7 +53,19 @@ def _str_pad(
4953
elif side == "right":
5054
pa_pad = pc.utf8_rpad
5155
elif side == "both":
52-
pa_pad = pc.utf8_center
56+
if pa_version_under17p0:
57+
# GH#59624 fall back to object dtype
58+
from pandas import array
59+
60+
obj_arr = self.astype(object, copy=False) # type: ignore[attr-defined]
61+
obj = array(obj_arr, dtype=object)
62+
result = obj._str_pad(width, side, fillchar) # type: ignore[attr-defined]
63+
return type(self)._from_sequence(result, dtype=self.dtype) # type: ignore[attr-defined]
64+
else:
65+
# GH#54792
66+
# https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
67+
lean_left = (width % 2) == 0
68+
pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
5369
else:
5470
raise ValueError(
5571
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"

pandas/core/arrays/_ranges.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
iNaT,
1919
)
2020

21+
from pandas.core.construction import range_to_ndarray
22+
2123
if TYPE_CHECKING:
2224
from pandas._typing import npt
2325

@@ -82,17 +84,7 @@ def generate_regular_range(
8284
"at least 'start' or 'end' should be specified if a 'period' is given."
8385
)
8486

85-
with np.errstate(over="raise"):
86-
# If the range is sufficiently large, np.arange may overflow
87-
# and incorrectly return an empty array if not caught.
88-
try:
89-
values = np.arange(b, e, stride, dtype=np.int64)
90-
except FloatingPointError:
91-
xdr = [b]
92-
while xdr[-1] != e:
93-
xdr.append(xdr[-1] + stride)
94-
values = np.array(xdr[:-1], dtype=np.int64)
95-
return values
87+
return range_to_ndarray(range(b, e, stride))
9688

9789

9890
def _generate_range_overflow_safe(

pandas/core/arrays/string_arrow.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
TYPE_CHECKING,
77
Union,
88
)
9+
import warnings
910

1011
import numpy as np
1112

@@ -19,6 +20,7 @@
1920
pa_version_under10p1,
2021
pa_version_under13p0,
2122
)
23+
from pandas.util._exceptions import find_stack_level
2224

2325
from pandas.core.dtypes.common import (
2426
is_scalar,
@@ -282,6 +284,7 @@ def astype(self, dtype, copy: bool = True):
282284
_str_map = BaseStringArray._str_map
283285
_str_startswith = ArrowStringArrayMixin._str_startswith
284286
_str_endswith = ArrowStringArrayMixin._str_endswith
287+
_str_pad = ArrowStringArrayMixin._str_pad
285288

286289
def _str_contains(
287290
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -297,6 +300,14 @@ def _str_contains(
297300
result = pc.match_substring(self._pa_array, pat, ignore_case=not case)
298301
result = self._convert_bool_result(result, na=na)
299302
if not isna(na):
303+
if not isinstance(na, bool):
304+
# GH#59561
305+
warnings.warn(
306+
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
307+
"and will raise in a future version.",
308+
FutureWarning,
309+
stacklevel=find_stack_level(),
310+
)
300311
result[isna(result)] = bool(na)
301312
return result
302313

@@ -536,7 +547,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
536547
_str_get = ArrowStringArrayMixin._str_get
537548
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
538549
_str_capitalize = ArrowStringArrayMixin._str_capitalize
539-
_str_pad = ArrowStringArrayMixin._str_pad
540550
_str_title = ArrowStringArrayMixin._str_title
541551
_str_swapcase = ArrowStringArrayMixin._str_swapcase
542552
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace

pandas/core/dtypes/dtypes.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ def _hash_categories(self) -> int:
513513
[cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)]
514514
)
515515
else:
516-
cat_array = np.array([cat_array])
516+
cat_array = cat_array.reshape(1, len(cat_array))
517517
combined_hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
518518
return np.bitwise_xor.reduce(combined_hashed)
519519

@@ -986,6 +986,14 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
986986
-------
987987
None
988988
989+
See Also
990+
--------
991+
Period : Represents a single time period.
992+
PeriodIndex : Immutable index for period data.
993+
date_range : Return a fixed frequency DatetimeIndex.
994+
Series : One-dimensional array with axis labels.
995+
DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data.
996+
989997
Examples
990998
--------
991999
>>> pd.PeriodDtype(freq="D")

pandas/core/groupby/groupby.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4370,11 +4370,12 @@ def post_processor(
43704370

43714371
return vals
43724372

4373-
qs = np.array(q, dtype=np.float64)
4374-
pass_qs: np.ndarray | None = qs
43754373
if is_scalar(q):
43764374
qs = np.array([q], dtype=np.float64)
4377-
pass_qs = None
4375+
pass_qs: None | np.ndarray = None
4376+
else:
4377+
qs = np.asarray(q, dtype=np.float64)
4378+
pass_qs = qs
43784379

43794380
ids = self._grouper.ids
43804381
ngroups = self._grouper.ngroups

pandas/core/indexes/multi.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ def dtypes(self) -> Series:
799799
"""
800800
from pandas import Series
801801

802-
names = com.fill_missing_names([level.name for level in self.levels])
802+
names = com.fill_missing_names(self.names)
803803
return Series([level.dtype for level in self.levels], index=Index(names))
804804

805805
def __len__(self) -> int:
@@ -1572,7 +1572,7 @@ def _format_multi(
15721572
def _get_names(self) -> FrozenList:
15731573
return FrozenList(self._names)
15741574

1575-
def _set_names(self, names, *, level=None, validate: bool = True) -> None:
1575+
def _set_names(self, names, *, level=None) -> None:
15761576
"""
15771577
Set new names on index. Each name has to be a hashable type.
15781578
@@ -1583,8 +1583,6 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
15831583
level : int, level name, or sequence of int/level names (default None)
15841584
If the index is a MultiIndex (hierarchical), level(s) to set (None
15851585
for all levels). Otherwise level must be None
1586-
validate : bool, default True
1587-
validate that the names match level lengths
15881586
15891587
Raises
15901588
------
@@ -1603,13 +1601,12 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
16031601
raise ValueError("Names should be list-like for a MultiIndex")
16041602
names = list(names)
16051603

1606-
if validate:
1607-
if level is not None and len(names) != len(level):
1608-
raise ValueError("Length of names must match length of level.")
1609-
if level is None and len(names) != self.nlevels:
1610-
raise ValueError(
1611-
"Length of names must match number of levels in MultiIndex."
1612-
)
1604+
if level is not None and len(names) != len(level):
1605+
raise ValueError("Length of names must match length of level.")
1606+
if level is None and len(names) != self.nlevels:
1607+
raise ValueError(
1608+
"Length of names must match number of levels in MultiIndex."
1609+
)
16131610

16141611
if level is None:
16151612
level = range(self.nlevels)
@@ -1627,8 +1624,9 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
16271624
)
16281625
self._names[lev] = name
16291626

1630-
# If .levels has been accessed, the names in our cache will be stale.
1631-
self._reset_cache()
1627+
# If .levels has been accessed, the .name of each level in our cache
1628+
# will be stale.
1629+
self._reset_cache("levels")
16321630

16331631
names = property(
16341632
fset=_set_names,
@@ -2686,9 +2684,9 @@ def _get_codes_for_sorting(self) -> list[Categorical]:
26862684
a valid valid
26872685
"""
26882686

2689-
def cats(level_codes):
2687+
def cats(level_codes: np.ndarray) -> np.ndarray:
26902688
return np.arange(
2691-
np.array(level_codes).max() + 1 if len(level_codes) else 0,
2689+
level_codes.max() + 1 if len(level_codes) else 0,
26922690
dtype=level_codes.dtype,
26932691
)
26942692

pandas/core/indexes/range.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ class RangeIndex(Index):
9090
start : int (default: 0), range, or other RangeIndex instance
9191
If int and "stop" is not given, interpreted as "stop" instead.
9292
stop : int (default: 0)
93+
The end value of the range (exclusive).
9394
step : int (default: 1)
95+
The step size of the range.
9496
dtype : np.int64
9597
Unused, accepted for homogeneity with other index types.
9698
copy : bool, default False

pandas/core/interchange/from_dataframe.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame:
6060
Returns
6161
-------
6262
pd.DataFrame
63+
A pandas DataFrame built from the provided interchange
64+
protocol object.
65+
66+
See Also
67+
--------
68+
pd.DataFrame : DataFrame class which can be created from various input data
69+
formats, including objects that support the interchange protocol.
6370
6471
Examples
6572
--------

pandas/core/strings/object_array.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@
99
cast,
1010
)
1111
import unicodedata
12+
import warnings
1213

1314
import numpy as np
1415

1516
from pandas._libs import lib
1617
import pandas._libs.missing as libmissing
1718
import pandas._libs.ops as libops
19+
from pandas.util._exceptions import find_stack_level
1820

1921
from pandas.core.dtypes.missing import isna
2022

@@ -142,14 +144,38 @@ def _str_contains(
142144
else:
143145
upper_pat = pat.upper()
144146
f = lambda x: upper_pat in x.upper()
147+
if not isna(na) and not isinstance(na, bool):
148+
# GH#59561
149+
warnings.warn(
150+
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
151+
"and will raise in a future version.",
152+
FutureWarning,
153+
stacklevel=find_stack_level(),
154+
)
145155
return self._str_map(f, na, dtype=np.dtype("bool"))
146156

147157
def _str_startswith(self, pat, na=None):
148158
f = lambda x: x.startswith(pat)
159+
if not isna(na) and not isinstance(na, bool):
160+
# GH#59561
161+
warnings.warn(
162+
"Allowing a non-bool 'na' in obj.str.startswith is deprecated "
163+
"and will raise in a future version.",
164+
FutureWarning,
165+
stacklevel=find_stack_level(),
166+
)
149167
return self._str_map(f, na_value=na, dtype=np.dtype(bool))
150168

151169
def _str_endswith(self, pat, na=None):
152170
f = lambda x: x.endswith(pat)
171+
if not isna(na) and not isinstance(na, bool):
172+
# GH#59561
173+
warnings.warn(
174+
"Allowing a non-bool 'na' in obj.str.endswith is deprecated "
175+
"and will raise in a future version.",
176+
FutureWarning,
177+
stacklevel=find_stack_level(),
178+
)
153179
return self._str_map(f, na_value=na, dtype=np.dtype(bool))
154180

155181
def _str_replace(

0 commit comments

Comments
 (0)