Skip to content

Commit 618f66f

Browse files
committed
API: str.center with pyarrow-backed string dtype
1 parent 1b7bfed commit 618f66f

File tree

4 files changed

+13
-8
lines changed

4 files changed

+13
-8
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ Other API changes
276276
- Made ``dtype`` a required argument in :meth:`ExtensionArray._from_sequence_of_strings` (:issue:`56519`)
277277
- Passing a :class:`Series` input to :func:`json_normalize` will now retain the :class:`Series` :class:`Index`, previously output had a new :class:`RangeIndex` (:issue:`51452`)
278278
- Removed :meth:`Index.sort` which always raised a ``TypeError``. This attribute is not defined and will raise an ``AttributeError`` (:issue:`59283`)
279+
- The ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype now matches the python behavior in corner cases with an odd number of fill characters when using pyarrow versions 17.0 and above (:issue:`54792`)
279280
- Updated :meth:`DataFrame.to_excel` so that the output spreadsheet has no styling. Custom styling can still be done using :meth:`Styler.to_excel` (:issue:`54154`)
280281
- pickle and HDF (``.h5``) files created with Python 2 are no longer explicitly supported (:issue:`57387`)
281282
- pickled objects from pandas version less than ``1.0.0`` are no longer supported (:issue:`57155`)

pandas/core/arrays/_arrow_string_mixins.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
from typing import (
45
TYPE_CHECKING,
56
Literal,
67
)
78

89
import numpy as np
910

10-
from pandas.compat import pa_version_under10p1
11+
from pandas.compat import (
12+
pa_version_under10p1,
13+
pa_version_under17p0,
14+
)
1115

1216
if not pa_version_under10p1:
1317
import pyarrow as pa
@@ -34,7 +38,11 @@ def _str_pad(
3438
elif side == "right":
3539
pa_pad = pc.utf8_rpad
3640
elif side == "both":
37-
pa_pad = pc.utf8_center
41+
if pa_version_under17p0:
42+
pa_pad = pc.utf8_center
43+
else:
44+
# GH#54792
45+
pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=False)
3846
else:
3947
raise ValueError(
4048
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ def astype(self, dtype, copy: bool = True):
280280
# String methods interface
281281

282282
_str_map = BaseStringArray._str_map
283+
_str_pad = ArrowStringArrayMixin._str_pad
283284

284285
def _str_contains(
285286
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -574,7 +575,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
574575
_str_get = ArrowStringArrayMixin._str_get
575576
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
576577
_str_capitalize = ArrowStringArrayMixin._str_capitalize
577-
_str_pad = ArrowStringArrayMixin._str_pad
578578
_str_title = ArrowStringArrayMixin._str_title
579579
_str_swapcase = ArrowStringArrayMixin._str_swapcase
580580
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace

pandas/tests/strings/test_case_justify.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,7 @@ def test_center_ljust_rjust_mixed_object():
291291

292292

293293
def test_center_ljust_rjust_fillchar(any_string_dtype):
294-
if any_string_dtype == "string[pyarrow_numpy]":
295-
pytest.skip(
296-
"Arrow logic is different, "
297-
"see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
298-
)
294+
# GH#54533, GH#54792
299295
s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
300296

301297
result = s.str.center(5, fillchar="X")

0 commit comments

Comments
 (0)