From 0b679b470c4c5ed9b76a6df6e2b6a88bb89950ac Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 25 Jan 2015 20:32:57 +0900 Subject: [PATCH] ENH: StringMethods now supports ljust and rjust --- doc/source/api.rst | 2 + doc/source/text.rst | 4 +- doc/source/whatsnew/v0.16.0.txt | 5 ++ pandas/core/strings.py | 76 ++++++++++++++++---------- pandas/tests/test_strings.py | 95 ++++++++++++++++++++++++++++++++- 5 files changed, 152 insertions(+), 30 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index a8097f2648c4b..f3843f26a9505 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -535,12 +535,14 @@ strings and apply several methods to it. These can be acccessed like Series.str.get Series.str.join Series.str.len + Series.str.ljust Series.str.lower Series.str.lstrip Series.str.match Series.str.pad Series.str.repeat Series.str.replace + Series.str.rjust Series.str.rstrip Series.str.slice Series.str.slice_replace diff --git a/doc/source/text.rst b/doc/source/text.rst index eb11cfb1248a9..3ad32c3c41073 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -212,7 +212,9 @@ Method Summary :meth:`~Series.str.replace`,Replace occurrences of pattern/regex with some other string :meth:`~Series.str.repeat`,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) :meth:`~Series.str.pad`,"Add whitespace to left, right, or both sides of strings" - :meth:`~Series.str.center`,Equivalent to ``pad(side='both')`` + :meth:`~Series.str.center`,Equivalent to ``str.center`` + :meth:`~Series.str.ljust`,Equivalent to ``str.ljust`` + :meth:`~Series.str.rjust`,Equivalent to ``str.rjust`` :meth:`~Series.str.wrap`,Split long strings into lines with length less than a given width :meth:`~Series.str.slice`,Slice each string in the Series :meth:`~Series.str.slice_replace`,Replace slice in each string with passed value diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 6082a58687c2c..25e436f8504d1 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -110,6 +110,11 @@ Enhancements - Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) + + +- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) +- ``StringMethods.pad()`` and ``center()`` now accept `fillchar` option to specify filling character (:issue:`9352`) + Performance ~~~~~~~~~~~ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 75d10654977cd..8845944d615e0 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -3,12 +3,16 @@ from pandas.compat import zip from pandas.core.common import isnull, _values_from_object import pandas.compat as compat +from pandas.util.decorators import Appender import re import pandas.lib as lib import warnings import textwrap +_shared_docs = dict() + + def _get_array_list(arr, others): from pandas.core.series import Series @@ -583,9 +587,9 @@ def str_findall(arr, pat, flags=0): return _na_map(regex.findall, arr) -def str_pad(arr, width, side='left'): +def str_pad(arr, width, side='left', fillchar=' '): """ - Pad strings with whitespace + Pad strings with an additional character Parameters ---------- @@ -594,40 +598,33 @@ def str_pad(arr, width, side='left'): Minimum width of resulting string; additional characters will be filled with spaces side : {'left', 'right', 'both'}, default 'left' + fillchar : str + Additional character for filling, default is whitespace Returns ------- padded : array """ + + if not isinstance(fillchar, compat.string_types): + msg = 'fillchar must be a character, not {0}' + raise TypeError(msg.format(type(fillchar).__name__)) + + if len(fillchar) != 1: + raise TypeError('fillchar must be a character, not str') + if side == 'left': - f = lambda x: x.rjust(width) + f = lambda x: x.rjust(width, fillchar) elif side == 'right': - f = lambda x: x.ljust(width) + f = lambda x: x.ljust(width, fillchar) elif side == 'both': - f = lambda x: x.center(width) + f = lambda x: x.center(width, fillchar) else: # pragma: no cover raise ValueError('Invalid side') return _na_map(f, arr) -def str_center(arr, width): - """ - "Center" strings, filling left and right side with additional whitespace - - Parameters - ---------- - width : int - Minimum width of resulting string; additional characters will be filled - with spaces - - Returns - ------- - centered : array - """ - return str_pad(arr, width, side='both') - - def str_split(arr, pat=None, n=None, return_type='series'): """ Split each string (a la re.split) in array by given pattern, propagating NA @@ -1016,14 +1013,37 @@ def repeat(self, repeats): return self._wrap_result(result) @copy(str_pad) - def pad(self, width, side='left'): - result = str_pad(self.series, width, side=side) + def pad(self, width, side='left', fillchar=' '): + result = str_pad(self.series, width, side=side, fillchar=fillchar) return self._wrap_result(result) - @copy(str_center) - def center(self, width): - result = str_center(self.series, width) - return self._wrap_result(result) + _shared_docs['str_pad'] = (""" + "Center" strings, filling %s side with an additional character + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with ``fillchar`` + fillchar : str + Additional character for filling, default is whitespace + + Returns + ------- + centered : array + """) + + @Appender(_shared_docs['str_pad'] % 'left and right') + def center(self, width, fillchar=' '): + return self.pad(width, side='both', fillchar=fillchar) + + @Appender(_shared_docs['str_pad'] % 'right') + def ljust(self, width, fillchar=' '): + return self.pad(width, side='right', fillchar=fillchar) + + @Appender(_shared_docs['str_pad'] % 'left') + def rjust(self, width, fillchar=' '): + return self.pad(width, side='left', fillchar=fillchar) @copy(str_slice) def slice(self, start=None, stop=None, step=None): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index b8f1a6ac342af..8bdc9d4f25ac9 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -770,13 +770,43 @@ def test_pad(self): u('eeeeee')]) tm.assert_almost_equal(result, exp) - def test_center(self): + def test_pad_fillchar(self): + + values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) + + result = values.str.pad(5, side='left', fillchar='X') + exp = Series(['XXXXa', 'XXXXb', NA, 'XXXXc', NA, 'eeeeee']) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side='right', fillchar='X') + exp = Series(['aXXXX', 'bXXXX', NA, 'cXXXX', NA, 'eeeeee']) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side='both', fillchar='X') + exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) + tm.assert_almost_equal(result, exp) + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + result = values.str.pad(5, fillchar='XY') + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + result = values.str.pad(5, fillchar=5) + + def test_center_ljust_rjust(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) result = values.str.center(5) exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) + result = values.str.ljust(5) + exp = Series(['a ', 'b ', NA, 'c ', NA, 'eeeeee']) + tm.assert_almost_equal(result, exp) + + result = values.str.rjust(5) + exp = Series([' a', ' b', NA, ' c', NA, 'eeeeee']) + tm.assert_almost_equal(result, exp) + # mixed mixed = Series(['a', NA, 'b', True, datetime.today(), 'c', 'eee', None, 1, 2.]) @@ -784,7 +814,18 @@ def test_center(self): rs = Series(mixed).str.center(5) xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA]) + tm.assert_isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + rs = Series(mixed).str.ljust(5) + xp = Series(['a ', NA, 'b ', NA, NA, 'c ', 'eee ', NA, NA, + NA]) + tm.assert_isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.rjust(5) + xp = Series([' a', NA, ' b', NA, NA, ' c', ' eee', NA, NA, + NA]) tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) @@ -797,6 +838,58 @@ def test_center(self): u('eeeeee')]) tm.assert_almost_equal(result, exp) + result = values.str.ljust(5) + exp = Series([u('a '), u('b '), NA, u('c '), NA, + u('eeeeee')]) + tm.assert_almost_equal(result, exp) + + result = values.str.rjust(5) + exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, + u('eeeeee')]) + tm.assert_almost_equal(result, exp) + + def test_center_ljust_rjust_fillchar(self): + values = Series(['a', 'bb', 'cccc', 'ddddd', 'eeeeee']) + + result = values.str.center(5, fillchar='X') + expected = Series(['XXaXX', 'XXbbX', 'Xcccc', 'ddddd', 'eeeeee']) + tm.assert_series_equal(result, expected) + expected = np.array([v.center(5, 'X') for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.ljust(5, fillchar='X') + expected = Series(['aXXXX', 'bbXXX', 'ccccX', 'ddddd', 'eeeeee']) + tm.assert_series_equal(result, expected) + expected = np.array([v.ljust(5, 'X') for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rjust(5, fillchar='X') + expected = Series(['XXXXa', 'XXXbb', 'Xcccc', 'ddddd', 'eeeeee']) + tm.assert_series_equal(result, expected) + expected = np.array([v.rjust(5, 'X') for v in values.values]) + tm.assert_numpy_array_equal(result.values, expected) + + # If fillchar is not a charatter, normal str raises TypeError + # 'aaa'.ljust(5, 'XY') + # TypeError: must be char, not str + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + result = values.str.center(5, fillchar='XY') + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + result = values.str.ljust(5, fillchar='XY') + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + result = values.str.rjust(5, fillchar='XY') + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + result = values.str.center(5, fillchar=1) + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + result = values.str.ljust(5, fillchar=1) + + with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + result = values.str.rjust(5, fillchar=1) + def test_split(self): values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])