Skip to content

Commit 61608b9

Browse files
simonjayhawkinsJulianWgs
authored andcommitted
[ArrowStringArray] use pyarrow string trimming functions if available (pandas-dev#41219)
1 parent f7aaa00 commit 61608b9

File tree

2 files changed

+37
-10
lines changed

2 files changed

+37
-10
lines changed

pandas/core/arrays/string_arrow.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,3 +852,30 @@ def _str_lower(self):
852852

853853
def _str_upper(self):
854854
return type(self)(pc.utf8_upper(self._data))
855+
856+
def _str_strip(self, to_strip=None):
857+
if to_strip is None:
858+
if hasattr(pc, "utf8_trim_whitespace"):
859+
return type(self)(pc.utf8_trim_whitespace(self._data))
860+
else:
861+
if hasattr(pc, "utf8_trim"):
862+
return type(self)(pc.utf8_trim(self._data, characters=to_strip))
863+
return super()._str_strip(to_strip)
864+
865+
def _str_lstrip(self, to_strip=None):
866+
if to_strip is None:
867+
if hasattr(pc, "utf8_ltrim_whitespace"):
868+
return type(self)(pc.utf8_ltrim_whitespace(self._data))
869+
else:
870+
if hasattr(pc, "utf8_ltrim"):
871+
return type(self)(pc.utf8_ltrim(self._data, characters=to_strip))
872+
return super()._str_lstrip(to_strip)
873+
874+
def _str_rstrip(self, to_strip=None):
875+
if to_strip is None:
876+
if hasattr(pc, "utf8_rtrim_whitespace"):
877+
return type(self)(pc.utf8_rtrim_whitespace(self._data))
878+
else:
879+
if hasattr(pc, "utf8_rtrim"):
880+
return type(self)(pc.utf8_rtrim(self._data, characters=to_strip))
881+
return super()._str_rstrip(to_strip)

pandas/tests/strings/test_strings.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -570,19 +570,19 @@ def test_slice_replace():
570570
tm.assert_series_equal(result, exp)
571571

572572

573-
def test_strip_lstrip_rstrip():
574-
values = Series([" aa ", " bb \n", np.nan, "cc "])
573+
def test_strip_lstrip_rstrip(any_string_dtype):
574+
values = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype)
575575

576576
result = values.str.strip()
577-
exp = Series(["aa", "bb", np.nan, "cc"])
577+
exp = Series(["aa", "bb", np.nan, "cc"], dtype=any_string_dtype)
578578
tm.assert_series_equal(result, exp)
579579

580580
result = values.str.lstrip()
581-
exp = Series(["aa ", "bb \n", np.nan, "cc "])
581+
exp = Series(["aa ", "bb \n", np.nan, "cc "], dtype=any_string_dtype)
582582
tm.assert_series_equal(result, exp)
583583

584584
result = values.str.rstrip()
585-
exp = Series([" aa", " bb", np.nan, "cc"])
585+
exp = Series([" aa", " bb", np.nan, "cc"], dtype=any_string_dtype)
586586
tm.assert_series_equal(result, exp)
587587

588588

@@ -609,19 +609,19 @@ def test_strip_lstrip_rstrip_mixed():
609609
tm.assert_almost_equal(rs, xp)
610610

611611

612-
def test_strip_lstrip_rstrip_args():
613-
values = Series(["xxABCxx", "xx BNSD", "LDFJH xx"])
612+
def test_strip_lstrip_rstrip_args(any_string_dtype):
613+
values = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype)
614614

615615
rs = values.str.strip("x")
616-
xp = Series(["ABC", " BNSD", "LDFJH "])
616+
xp = Series(["ABC", " BNSD", "LDFJH "], dtype=any_string_dtype)
617617
tm.assert_series_equal(rs, xp)
618618

619619
rs = values.str.lstrip("x")
620-
xp = Series(["ABCxx", " BNSD", "LDFJH xx"])
620+
xp = Series(["ABCxx", " BNSD", "LDFJH xx"], dtype=any_string_dtype)
621621
tm.assert_series_equal(rs, xp)
622622

623623
rs = values.str.rstrip("x")
624-
xp = Series(["xxABC", "xx BNSD", "LDFJH "])
624+
xp = Series(["xxABC", "xx BNSD", "LDFJH "], dtype=any_string_dtype)
625625
tm.assert_series_equal(rs, xp)
626626

627627

0 commit comments

Comments
 (0)