Skip to content

Commit d01725c

Browse files
committed
ENH: support str translate for StringMethods
1 parent e686387 commit d01725c

File tree

5 files changed

+83
-1
lines changed

5 files changed

+83
-1
lines changed

doc/source/api.rst

+1
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ strings and apply several methods to it. These can be acccessed like
558558
Series.str.strip
559559
Series.str.swapcase
560560
Series.str.title
561+
Series.str.translate
561562
Series.str.upper
562563
Series.str.wrap
563564
Series.str.zfill

doc/source/text.rst

+1
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ Method Summary
271271
:meth:`~Series.str.capitalize`,Equivalent to ``str.capitalize``
272272
:meth:`~Series.str.swapcase`,Equivalent to ``str.swapcase``
273273
:meth:`~Series.str.normalize`,Return Unicode normal form. Equivalent to ``unicodedata.normalize``
274+
:meth:`~Series.str.translate`,Equivalent to ``str.translate``
274275
:meth:`~Series.str.isalnum`,Equivalent to ``str.isalnum``
275276
:meth:`~Series.str.isalpha`,Equivalent to ``str.isalpha``
276277
:meth:`~Series.str.isdigit`,Equivalent to ``str.isdigit``

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Enhancements
4040
Timestamp('2014-08-01 16:30') + BusinessHour()
4141

4242
- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`)
43-
43+
- Added support for ``StringMethods.translate()`` (:issue:`10052`)
4444
- Allow clip, clip_lower, and clip_upper to accept array-like arguments as thresholds (:issue:`6966`). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s).
4545

4646
- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`)

pandas/core/strings.py

+43
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,44 @@ def str_wrap(arr, width, **kwargs):
870870
return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr)
871871

872872

873+
def str_translate(arr, table, deletechars=None):
874+
"""
875+
Map all characters in the string through the given mapping table.
876+
Equivalent to standard :meth:`str.translate`. Note that the optional
877+
argument deletechars is only valid if you are using python 2. For python 3,
878+
character deletion should be specified via the table argument.
879+
880+
Parameters
881+
----------
882+
table : dict (python 3), str or None (python 2)
883+
In python 3, table is a mapping of Unicode ordinals to Unicode ordinals,
884+
strings, or None. Unmapped characters are left untouched. Characters
885+
mapped to None are deleted. :meth:`str.maketrans` is a helper function
886+
for making translation tables.
887+
In python 2, table is either a string of length 256 or None. If the
888+
table argument is None, no translation is applied and the operation
889+
simply removes the characters in deletechars. :func:`string.maketrans`
890+
is a helper function for making translation tables.
891+
deletechars : str, optional (python 2)
892+
A string of characters to delete. This argument is only valid
893+
in python 2.
894+
895+
Returns
896+
-------
897+
translated : Series/Index of objects
898+
"""
899+
if deletechars is None:
900+
f = lambda x: x.translate(table)
901+
else:
902+
from pandas import compat
903+
if compat.PY3:
904+
raise ValueError("deletechars is not a valid argument for "
905+
"str.translate in python 3. You should simply "
906+
"specify character deletions in the table argument")
907+
f = lambda x: x.translate(table, deletechars)
908+
return _na_map(f, arr)
909+
910+
873911
def str_get(arr, i):
874912
"""
875913
Extract element from lists, tuples, or strings in each element in the
@@ -1262,6 +1300,11 @@ def get_dummies(self, sep='|'):
12621300
result = str_get_dummies(self.series, sep)
12631301
return self._wrap_result(result)
12641302

1303+
@copy(str_translate)
1304+
def translate(self, table, deletechars=None):
1305+
result = str_translate(self.series, table, deletechars)
1306+
return self._wrap_result(result)
1307+
12651308
count = _pat_wrapper(str_count, flags=True)
12661309
startswith = _pat_wrapper(str_startswith, na=True)
12671310
endswith = _pat_wrapper(str_endswith, na=True)

pandas/tests/test_strings.py

+37
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,12 @@ def test_empty_str_methods(self):
700700
tm.assert_series_equal(empty_str, empty.str.capitalize())
701701
tm.assert_series_equal(empty_str, empty.str.swapcase())
702702
tm.assert_series_equal(empty_str, empty.str.normalize('NFC'))
703+
if compat.PY3:
704+
table = str.maketrans('a', 'b')
705+
else:
706+
import string
707+
table = string.maketrans('a', 'b')
708+
tm.assert_series_equal(empty_str, empty.str.translate(table))
703709

704710
def test_empty_str_methods_to_frame(self):
705711
empty_str = empty = Series(dtype=str)
@@ -992,6 +998,37 @@ def test_pad_fillchar(self):
992998
with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
993999
result = values.str.pad(5, fillchar=5)
9941000

1001+
def test_translate(self):
1002+
for klass in [Series, Index]:
1003+
s = klass(['abcdefg', 'abcc', 'cdddfg', 'cdefggg'])
1004+
if not compat.PY3:
1005+
import string
1006+
table = string.maketrans('abc', 'cde')
1007+
else:
1008+
table = str.maketrans('abc', 'cde')
1009+
result = s.str.translate(table)
1010+
expected = klass(['cdedefg', 'cdee', 'edddfg', 'edefggg'])
1011+
tm.assert_array_equal(result, expected)
1012+
1013+
# use of deletechars is python 2 only
1014+
if not compat.PY3:
1015+
result = s.str.translate(table, deletechars='fg')
1016+
expected = klass(['cdede', 'cdee', 'eddd', 'ede'])
1017+
tm.assert_array_equal(result, expected)
1018+
1019+
result = s.str.translate(None, deletechars='fg')
1020+
expected = klass(['abcde', 'abcc', 'cddd', 'cde'])
1021+
tm.assert_array_equal(result, expected)
1022+
else:
1023+
with tm.assertRaisesRegexp(ValueError, "deletechars is not a valid argument"):
1024+
result = s.str.translate(table, deletechars='fg')
1025+
1026+
# Series with non-string values
1027+
s = Series(['a', 'b', 'c', 1.2])
1028+
expected = Series(['c', 'd', 'e', np.nan])
1029+
result = s.str.translate(table)
1030+
tm.assert_array_equal(result, expected)
1031+
9951032
def test_center_ljust_rjust(self):
9961033
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])
9971034

0 commit comments

Comments
 (0)