Skip to content

Commit d0e9034

Browse files
committed
ENH: add str.decode to Series for unicode decoding close #1706
1 parent 603e5ae commit d0e9034

File tree

3 files changed

+33
-0
lines changed

3 files changed

+33
-0
lines changed

RELEASE.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ pandas 0.8.2
2727

2828
**Release date:** NOT YET RELEASED
2929

30+
**New features**
31+
32+
- Add ``str.decode`` to Series (#1706)
33+
3034
**Improvements to existing features**
3135

3236
- Add ``flags`` option for ``re.compile`` in some Series.str methods (#1659)

pandas/core/strings.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,21 @@ def str_get(arr, i):
591591
f = lambda x: x[i]
592592
return _na_map(f, arr)
593593

594+
def str_decode(arr, encoding):
595+
"""
596+
Decode character string to unicode using indicated encoding
597+
598+
Parameters
599+
----------
600+
encoding : string
601+
602+
Returns
603+
-------
604+
decoded : array
605+
"""
606+
f = lambda x: x.decode(encoding)
607+
return _na_map(f, arr)
608+
594609
def _noarg_wrapper(f):
595610
def wrapper(self):
596611
result = f(self.series)
@@ -708,6 +723,11 @@ def slice(self, start=None, stop=None, step=1):
708723
def slice_replace(self, i=None, j=None):
709724
raise NotImplementedError
710725

726+
@copy(str_decode)
727+
def decode(self, encoding):
728+
result = str_decode(self.series, encoding)
729+
return self._wrap_result(result)
730+
711731
count = _pat_wrapper(str_count, flags=True)
712732
startswith = _pat_wrapper(str_startswith)
713733
endswith = _pat_wrapper(str_endswith)

pandas/tests/test_strings.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,15 @@ def test_match_findall_flags(self):
658658
result = data.str.contains(pat, flags=re.IGNORECASE)
659659
self.assertEquals(result[0], True)
660660

661+
def test_decode(self):
662+
series = Series(['a', 'b', '\xc3\xa4'])
663+
664+
f = lambda x: x.decode('utf-8')
665+
result = series.str.decode('utf-8')
666+
exp = series.map(f)
667+
668+
tm.assert_series_equal(result, exp)
669+
661670
if __name__ == '__main__':
662671
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
663672
exit=False)

0 commit comments

Comments
 (0)