Skip to content

Commit 40be2fc

Browse files
committed
ENH: support str translate for StringMethods
1 parent 2e087c7 commit 40be2fc

File tree

5 files changed

+83
-0
lines changed

5 files changed

+83
-0
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,7 @@ strings and apply several methods to it. These can be acccessed like
556556
Series.str.strip
557557
Series.str.swapcase
558558
Series.str.title
559+
Series.str.translate
559560
Series.str.upper
560561
Series.str.wrap
561562
Series.str.zfill

doc/source/text.rst

+1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ Method Summary
269269
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
270270
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
271271
:meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
272+
:meth:`~Series.str.translate`,Equivalent to ``str.translate``
272273
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
273274
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
274275
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Enhancements
3939

4040
- Added ``StringMethods.capitalize()`` and ``swapcase`` which behave as the same as standard ``str`` (:issue:`9766`)
4141
- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
42+
- Added support for ``StringMethods.translate()`` (:issue:`10052`)
4243
- Added ``StringMethods`` (.str accessor) to ``Index`` (:issue:`9068`)
4344
- Added ``StringMethods.normalize()`` which behaves the same as standard :func:`unicodedata.normalizes` (:issue:`10031`)
4445

pandas/core/strings.py

+43
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,44 @@ def str_wrap(arr, width, **kwargs):
861861
return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr)
862862

863863

864+
def str_translate(arr, table, deletechars=None):
865+
"""
866+
Map all characters in the string through the given mapping table.
867+
Equivalent to standard :meth:`str.translate`. Note that the optional
868+
argument deletechars is only valid if you are using python 2. For python 3,
869+
character deletion should be specified via the table argument.
870+
871+
Parameters
872+
----------
873+
table : dict (python 3), str or None (python 2)
874+
In python 3, table is a mapping of Unicode ordinals to Unicode ordinals,
875+
strings, or None. Unmapped characters are left untouched. Characters
876+
mapped to None are deleted. :meth:`str.maketrans` is a helper function
877+
for making translation tables.
878+
In python 2, table is either a string of length 256 or None. If the
879+
table argument is None, no translation is applied and the operation
880+
simply removes the characters in deletechars. :func:`string.maketrans`
881+
is a helper function for making translation tables.
882+
deletechars : str, optional (python 2)
883+
A string of characters to delete. This argument is only valid
884+
in python 2.
885+
886+
Returns
887+
-------
888+
translated : Series/Index of objects
889+
"""
890+
if deletechars is None:
891+
f = lambda x: x.translate(table)
892+
else:
893+
from pandas import compat
894+
if compat.PY3:
895+
raise ValueError("deletechars is not a valid argument for "
896+
"str.translate in python 3. You should simply "
897+
"specify character deletions in the table argument")
898+
f = lambda x: x.translate(table, deletechars)
899+
return _na_map(f, arr)
900+
901+
864902
def str_get(arr, i):
865903
"""
866904
Extract element from lists, tuples, or strings in each element in the
@@ -1165,6 +1203,11 @@ def get_dummies(self, sep='|'):
11651203
result = str_get_dummies(self.series, sep)
11661204
return self._wrap_result(result)
11671205

1206+
@copy(str_translate)
1207+
def translate(self, table, deletechars=None):
1208+
result = str_translate(self.series, table, deletechars)
1209+
return self._wrap_result(result)
1210+
11681211
count = _pat_wrapper(str_count, flags=True)
11691212
startswith = _pat_wrapper(str_startswith, na=True)
11701213
endswith = _pat_wrapper(str_endswith, na=True)

pandas/tests/test_strings.py

+37
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,12 @@ def test_empty_str_methods(self):
686686
tm.assert_series_equal(empty_str, empty.str.capitalize())
687687
tm.assert_series_equal(empty_str, empty.str.swapcase())
688688
tm.assert_series_equal(empty_str, empty.str.normalize('NFC'))
689+
if compat.PY3:
690+
table = str.maketrans('a', 'b')
691+
else:
692+
import string
693+
table = string.maketrans('a', 'b')
694+
tm.assert_series_equal(empty_str, empty.str.translate(table))
689695

690696
def test_ismethods(self):
691697
values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' ']
@@ -966,6 +972,37 @@ def test_pad_fillchar(self):
966972
with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
967973
result = values.str.pad(5, fillchar=5)
968974

975+
def test_translate(self):
976+
for klass in [Series, Index]:
977+
s = klass(['abcdefg', 'abcc', 'cdddfg', 'cdefggg'])
978+
if not compat.PY3:
979+
import string
980+
table = string.maketrans('abc', 'cde')
981+
else:
982+
table = str.maketrans('abc', 'cde')
983+
result = s.str.translate(table)
984+
expected = klass(['cdedefg', 'cdee', 'edddfg', 'edefggg'])
985+
tm.assert_array_equal(result, expected)
986+
987+
# use of deletechars is python 2 only
988+
if not compat.PY3:
989+
result = s.str.translate(table, deletechars='fg')
990+
expected = klass(['cdede', 'cdee', 'eddd', 'ede'])
991+
tm.assert_array_equal(result, expected)
992+
993+
result = s.str.translate(None, deletechars='fg')
994+
expected = klass(['abcde', 'abcc', 'cddd', 'cde'])
995+
tm.assert_array_equal(result, expected)
996+
else:
997+
with tm.assertRaisesRegexp(ValueError, "deletechars is not a valid argument"):
998+
result = s.str.translate(table, deletechars='fg')
999+
1000+
# Series with non-string values
1001+
s = Series(['a', 'b', 'c', 1.2])
1002+
expected = Series(['c', 'd', 'e', np.nan])
1003+
result = s.str.translate(table)
1004+
tm.assert_array_equal(result, expected)
1005+
9691006
def test_center_ljust_rjust(self):
9701007
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
9711008

0 commit comments

Comments
 (0)