Skip to content

Commit 48796a7

Browse files
authored
Merge branch 'main' into issue54938-struct-accessor
2 parents 3797729 + 876d785 commit 48796a7

File tree

7 files changed

+49
-14
lines changed

7 files changed

+49
-14
lines changed

doc/source/whatsnew/v2.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed regression in :func:`read_csv` when ``delim_whitespace`` is True (:issue:`54918`, :issue:`54931`)
2020
- Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`)
2121
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
22+
- Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
2223
- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
2324
- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
2425
- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)

pandas/core/arrays/arrow/array.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -2192,11 +2192,11 @@ def _str_rstrip(self, to_strip=None):
21922192
return type(self)(result)
21932193

21942194
def _str_removeprefix(self, prefix: str):
2195-
# TODO: Should work once https://github.com/apache/arrow/issues/14991 is fixed
2196-
# starts_with = pc.starts_with(self._pa_array, pattern=prefix)
2197-
# removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
2198-
# result = pc.if_else(starts_with, removed, self._pa_array)
2199-
# return type(self)(result)
2195+
if not pa_version_under13p0:
2196+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
2197+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
2198+
result = pc.if_else(starts_with, removed, self._pa_array)
2199+
return type(self)(result)
22002200
predicate = lambda val: val.removeprefix(prefix)
22012201
result = self._apply_elementwise(predicate)
22022202
return type(self)(pa.chunked_array(result))

pandas/core/arrays/string_arrow.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@
1515
lib,
1616
missing as libmissing,
1717
)
18-
from pandas.compat import pa_version_under7p0
18+
from pandas.compat import (
19+
pa_version_under7p0,
20+
pa_version_under13p0,
21+
)
1922
from pandas.util._exceptions import find_stack_level
2023

2124
from pandas.core.dtypes.common import (
@@ -446,6 +449,20 @@ def _str_rstrip(self, to_strip=None):
446449
result = pc.utf8_rtrim(self._pa_array, characters=to_strip)
447450
return type(self)(result)
448451

452+
def _str_removeprefix(self, prefix: str):
453+
if not pa_version_under13p0:
454+
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
455+
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
456+
result = pc.if_else(starts_with, removed, self._pa_array)
457+
return type(self)(result)
458+
return super()._str_removeprefix(prefix)
459+
460+
def _str_removesuffix(self, suffix: str):
461+
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
462+
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
463+
result = pc.if_else(ends_with, removed, self._pa_array)
464+
return type(self)(result)
465+
449466
def _str_count(self, pat: str, flags: int = 0):
450467
if flags:
451468
return super()._str_count(pat, flags)

pandas/core/generic.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -5718,10 +5718,12 @@ def filter(
57185718

57195719
if items is not None:
57205720
name = self._get_axis_name(axis)
5721+
items = Index(items).intersection(labels)
5722+
if len(items) == 0:
5723+
# Keep the dtype of labels when we are empty
5724+
items = items.astype(labels.dtype)
57215725
# error: Keywords must be strings
5722-
return self.reindex( # type: ignore[misc]
5723-
**{name: labels.intersection(items)}
5724-
)
5726+
return self.reindex(**{name: items}) # type: ignore[misc]
57255727
elif like:
57265728

57275729
def f(x) -> bool_t:

pandas/core/reshape/merge.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2421,7 +2421,8 @@ def _factorize_keys(
24212421

24222422
elif isinstance(lk, ExtensionArray) and lk.dtype == rk.dtype:
24232423
if (isinstance(lk.dtype, ArrowDtype) and is_string_dtype(lk.dtype)) or (
2424-
isinstance(lk.dtype, StringDtype) and lk.dtype.storage == "pyarrow"
2424+
isinstance(lk.dtype, StringDtype)
2425+
and lk.dtype.storage in ["pyarrow", "pyarrow_numpy"]
24252426
):
24262427
import pyarrow as pa
24272428
import pyarrow.compute as pc

pandas/tests/frame/methods/test_filter.py

+14
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,17 @@ def test_filter_regex_non_string(self):
137137
result = df.filter(regex="STRING")
138138
expected = df[["STRING"]]
139139
tm.assert_frame_equal(result, expected)
140+
141+
def test_filter_keep_order(self):
142+
# GH#54980
143+
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
144+
result = df.filter(items=["B", "A"])
145+
expected = df[["B", "A"]]
146+
tm.assert_frame_equal(result, expected)
147+
148+
def test_filter_different_dtype(self):
149+
# GH#54980
150+
df = DataFrame({1: [1, 2, 3], 2: [4, 5, 6]})
151+
result = df.filter(items=["B", "A"])
152+
expected = df[[]]
153+
tm.assert_frame_equal(result, expected)

pandas/tests/reshape/merge/test_merge.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2949,13 +2949,13 @@ def test_merge_ea_int_and_float_numpy():
29492949
tm.assert_frame_equal(result, expected.astype("float64"))
29502950

29512951

2952-
def test_merge_arrow_string_index():
2952+
def test_merge_arrow_string_index(any_string_dtype):
29532953
# GH#54894
29542954
pytest.importorskip("pyarrow")
2955-
left = DataFrame({"a": ["a", "b"]}, dtype="string[pyarrow]")
2956-
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype="string[pyarrow]"))
2955+
left = DataFrame({"a": ["a", "b"]}, dtype=any_string_dtype)
2956+
right = DataFrame({"b": 1}, index=Index(["a", "c"], dtype=any_string_dtype))
29572957
result = left.merge(right, left_on="a", right_index=True, how="left")
29582958
expected = DataFrame(
2959-
{"a": Series(["a", "b"], dtype="string[pyarrow]"), "b": [1, np.nan]}
2959+
{"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
29602960
)
29612961
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)