Skip to content

ENH: StringMethods now supports ljust and rjust #9352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -535,12 +535,14 @@ strings and apply several methods to it. These can be acccessed like
Series.str.get
Series.str.join
Series.str.len
Series.str.ljust
Series.str.lower
Series.str.lstrip
Series.str.match
Series.str.pad
Series.str.repeat
Series.str.replace
Series.str.rjust
Series.str.rstrip
Series.str.slice
Series.str.slice_replace
Expand Down
4 changes: 3 additions & 1 deletion doc/source/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ Method Summary
:meth:`~Series.str.replace`,Replace occurrences of pattern/regex with some other string
:meth:`~Series.str.repeat`,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
:meth:`~Series.str.pad`,"Add whitespace to left, right, or both sides of strings"
:meth:`~Series.str.center`,Equivalent to ``pad(side='both')``
:meth:`~Series.str.center`,Equivalent to ``str.center``
:meth:`~Series.str.ljust`,Equivalent to ``str.ljust``
:meth:`~Series.str.rjust`,Equivalent to ``str.rjust``
:meth:`~Series.str.wrap`,Split long strings into lines with length less than a given width
:meth:`~Series.str.slice`,Slice each string in the Series
:meth:`~Series.str.slice_replace`,Replace slice in each string with passed value
Expand Down
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ Enhancements

- Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)



- Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`)
- ``StringMethods.pad()`` and ``center()`` now accept `fillchar` option to specify filling character (:issue:`9352`)

Performance
~~~~~~~~~~~

Expand Down
76 changes: 48 additions & 28 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@
from pandas.compat import zip
from pandas.core.common import isnull, _values_from_object
import pandas.compat as compat
from pandas.util.decorators import Appender
import re
import pandas.lib as lib
import warnings
import textwrap


_shared_docs = dict()


def _get_array_list(arr, others):
from pandas.core.series import Series

Expand Down Expand Up @@ -583,9 +587,9 @@ def str_findall(arr, pat, flags=0):
return _na_map(regex.findall, arr)


def str_pad(arr, width, side='left'):
def str_pad(arr, width, side='left', fillchar=' '):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add here an explanation of the new kwarg fillchar as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, added.

"""
Pad strings with whitespace
Pad strings with an additional character

Parameters
----------
Expand All @@ -594,40 +598,33 @@ def str_pad(arr, width, side='left'):
Minimum width of resulting string; additional characters will be filled
with spaces
side : {'left', 'right', 'both'}, default 'left'
fillchar : str
Additional character for filling, default is whitespace

Returns
-------
padded : array
"""

if not isinstance(fillchar, compat.string_types):
msg = 'fillchar must be a character, not {0}'
raise TypeError(msg.format(type(fillchar).__name__))

if len(fillchar) != 1:
raise TypeError('fillchar must be a character, not str')

if side == 'left':
f = lambda x: x.rjust(width)
f = lambda x: x.rjust(width, fillchar)
elif side == 'right':
f = lambda x: x.ljust(width)
f = lambda x: x.ljust(width, fillchar)
elif side == 'both':
f = lambda x: x.center(width)
f = lambda x: x.center(width, fillchar)
else: # pragma: no cover
raise ValueError('Invalid side')

return _na_map(f, arr)


def str_center(arr, width):
"""
"Center" strings, filling left and right side with additional whitespace

Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled
with spaces

Returns
-------
centered : array
"""
return str_pad(arr, width, side='both')


def str_split(arr, pat=None, n=None, return_type='series'):
"""
Split each string (a la re.split) in array by given pattern, propagating NA
Expand Down Expand Up @@ -1016,14 +1013,37 @@ def repeat(self, repeats):
return self._wrap_result(result)

@copy(str_pad)
def pad(self, width, side='left'):
result = str_pad(self.series, width, side=side)
def pad(self, width, side='left', fillchar=' '):
result = str_pad(self.series, width, side=side, fillchar=fillchar)
return self._wrap_result(result)

@copy(str_center)
def center(self, width):
result = str_center(self.series, width)
return self._wrap_result(result)
_shared_docs['str_pad'] = ("""
"Center" strings, filling %s side with an additional character

Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled
with ``fillchar``
fillchar : str
Additional character for filling, default is whitespace

Returns
-------
centered : array
""")

@Appender(_shared_docs['str_pad'] % 'left and right')
def center(self, width, fillchar=' '):
return self.pad(width, side='both', fillchar=fillchar)

@Appender(_shared_docs['str_pad'] % 'right')
def ljust(self, width, fillchar=' '):
return self.pad(width, side='right', fillchar=fillchar)

@Appender(_shared_docs['str_pad'] % 'left')
def rjust(self, width, fillchar=' '):
return self.pad(width, side='left', fillchar=fillchar)

@copy(str_slice)
def slice(self, start=None, stop=None, step=None):
Expand Down
95 changes: 94 additions & 1 deletion pandas/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,21 +770,62 @@ def test_pad(self):
u('eeeeee')])
tm.assert_almost_equal(result, exp)

def test_center(self):
def test_pad_fillchar(self):

values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])

result = values.str.pad(5, side='left', fillchar='X')
exp = Series(['XXXXa', 'XXXXb', NA, 'XXXXc', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

result = values.str.pad(5, side='right', fillchar='X')
exp = Series(['aXXXX', 'bXXXX', NA, 'cXXXX', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

result = values.str.pad(5, side='both', fillchar='X')
exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"):
result = values.str.pad(5, fillchar='XY')

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
result = values.str.pad(5, fillchar=5)

def test_center_ljust_rjust(self):
values = Series(['a', 'b', NA, 'c', NA, 'eeeeee'])

result = values.str.center(5)
exp = Series([' a ', ' b ', NA, ' c ', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

result = values.str.ljust(5)
exp = Series(['a ', 'b ', NA, 'c ', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

result = values.str.rjust(5)
exp = Series([' a', ' b', NA, ' c', NA, 'eeeeee'])
tm.assert_almost_equal(result, exp)

# mixed
mixed = Series(['a', NA, 'b', True, datetime.today(),
'c', 'eee', None, 1, 2.])

rs = Series(mixed).str.center(5)
xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA,
NA])
tm.assert_isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

rs = Series(mixed).str.ljust(5)
xp = Series(['a ', NA, 'b ', NA, NA, 'c ', 'eee ', NA, NA,
NA])
tm.assert_isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

rs = Series(mixed).str.rjust(5)
xp = Series([' a', NA, ' b', NA, NA, ' c', ' eee', NA, NA,
NA])
tm.assert_isinstance(rs, Series)
tm.assert_almost_equal(rs, xp)

Expand All @@ -797,6 +838,58 @@ def test_center(self):
u('eeeeee')])
tm.assert_almost_equal(result, exp)

result = values.str.ljust(5)
exp = Series([u('a '), u('b '), NA, u('c '), NA,
u('eeeeee')])
tm.assert_almost_equal(result, exp)

result = values.str.rjust(5)
exp = Series([u(' a'), u(' b'), NA, u(' c'), NA,
u('eeeeee')])
tm.assert_almost_equal(result, exp)

def test_center_ljust_rjust_fillchar(self):
values = Series(['a', 'bb', 'cccc', 'ddddd', 'eeeeee'])

result = values.str.center(5, fillchar='X')
expected = Series(['XXaXX', 'XXbbX', 'Xcccc', 'ddddd', 'eeeeee'])
tm.assert_series_equal(result, expected)
expected = np.array([v.center(5, 'X') for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.ljust(5, fillchar='X')
expected = Series(['aXXXX', 'bbXXX', 'ccccX', 'ddddd', 'eeeeee'])
tm.assert_series_equal(result, expected)
expected = np.array([v.ljust(5, 'X') for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

result = values.str.rjust(5, fillchar='X')
expected = Series(['XXXXa', 'XXXbb', 'Xcccc', 'ddddd', 'eeeeee'])
tm.assert_series_equal(result, expected)
expected = np.array([v.rjust(5, 'X') for v in values.values])
tm.assert_numpy_array_equal(result.values, expected)

# If fillchar is not a charatter, normal str raises TypeError
# 'aaa'.ljust(5, 'XY')
# TypeError: must be char, not str
with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"):
result = values.str.center(5, fillchar='XY')

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"):
result = values.str.ljust(5, fillchar='XY')

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"):
result = values.str.rjust(5, fillchar='XY')

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
result = values.str.center(5, fillchar=1)

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
result = values.str.ljust(5, fillchar=1)

with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"):
result = values.str.rjust(5, fillchar=1)

def test_split(self):
values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h'])

Expand Down