Skip to content

Commit 5b838ab

Browse files
jeffreystarrjreback
authored andcommitted
ENH: Implementing str_wrap function
1 parent d2ead2c commit 5b838ab

File tree

5 files changed

+80
-3
lines changed

5 files changed

+80
-3
lines changed

doc/source/basics.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,7 @@ Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take
12241224
``repeat``,Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
12251225
``pad``,"Add whitespace to left, right, or both sides of strings"
12261226
``center``,Equivalent to ``pad(side='both')``
1227+
``wrap``,Split long strings into lines with length less than a given width
12271228
``slice``,Slice each string in the Series
12281229
``slice_replace``,Replace slice in each string with passed value
12291230
``count``,Count occurrences of pattern

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ Improvements to existing features
299299
- Refactor Block classes removing `Block.items` attributes to avoid duplication
300300
in item handling (:issue:`6745`, :issue:`6988`).
301301
- Improve performance in certain reindexing operations by optimizing ``take_2d`` (:issue:`6749`)
302+
- Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`)
302303

303304
.. _release.bug_fixes-0.14.0:
304305

doc/source/v0.14.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,8 @@ Enhancements
537537
columns=Grouper(freq='M', key='PayDay'),
538538
values='Quantity', aggfunc=np.sum)
539539

540+
- str.wrap implemented (:issue:`6999`)
541+
540542
Performance
541543
~~~~~~~~~~~
542544

pandas/core/strings.py

+51-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import re
99
import pandas.lib as lib
1010
import warnings
11+
import textwrap
1112

1213

1314
def _get_array_list(arr, others):
@@ -717,20 +718,63 @@ def str_rstrip(arr, to_strip=None):
717718
return _na_map(lambda x: x.rstrip(to_strip), arr)
718719

719720

720-
def str_wrap(arr, width=80):
721+
def str_wrap(arr, width, **kwargs):
721722
"""
722723
Wrap long strings to be formatted in paragraphs
723724
724725
Parameters
725726
----------
727+
Same keyword parameters and defaults as :class:`textwrap.TextWrapper`
726728
width : int
727729
Maximum line-width
730+
expand_tabs : bool, optional
731+
If true, tab characters will be expanded to spaces (default: True)
732+
replace_whitespace : bool, optional
733+
If true, each whitespace character (as defined by string.whitespace) remaining
734+
after tab expansion will be replaced by a single space (default: True)
735+
drop_whitespace : bool, optional
736+
If true, whitespace that, after wrapping, happens to end up at the beginning
737+
or end of a line is dropped (default: True)
738+
break_long_words : bool, optional
739+
If true, then words longer than width will be broken in order to ensure that
740+
no lines are longer than width. If it is false, long words will not be broken,
741+
and some lines may be longer than width. (default: True)
742+
break_on_hyphens : bool, optional
743+
If true, wrapping will occur preferably on whitespace and right after hyphens
744+
in compound words, as it is customary in English. If false, only whitespaces
745+
will be considered as potentially good places for line breaks, but you need
746+
to set break_long_words to false if you want truly insecable words.
747+
(default: True)
728748
729749
Returns
730750
-------
731751
wrapped : array
752+
753+
Notes
754+
-----
755+
Internally, this method uses a :class:`textwrap.TextWrapper` instance with default
756+
settings. To achieve behavior matching R's stringr library str_wrap function, use
757+
the arguments:
758+
759+
expand_tabs = False
760+
replace_whitespace = True
761+
drop_whitespace = True
762+
break_long_words = False
763+
break_on_hyphens = False
764+
765+
Examples
766+
--------
767+
768+
>>> s = pd.Series(['line to be wrapped', 'another line to be wrapped'])
769+
>>> s.str.wrap(12)
770+
0 line to be\nwrapped
771+
1 another line\nto be\nwrapped
732772
"""
733-
raise NotImplementedError
773+
kwargs['width'] = width
774+
775+
tw = textwrap.TextWrapper(**kwargs)
776+
777+
return _na_map(lambda s: '\n'.join(tw.wrap(s)), arr)
734778

735779

736780
def str_get(arr, i):
@@ -955,6 +999,11 @@ def rstrip(self, to_strip=None):
955999
result = str_rstrip(self.series, to_strip)
9561000
return self._wrap_result(result)
9571001

1002+
@copy(str_wrap)
1003+
def wrap(self, width, **kwargs):
1004+
result = str_wrap(self.series, width, **kwargs)
1005+
return self._wrap_result(result)
1006+
9581007
@copy(str_get_dummies)
9591008
def get_dummies(self, sep='|'):
9601009
result = str_get_dummies(self.series, sep)

pandas/tests/test_strings.py

+25-1
Original file line numberDiff line numberDiff line change
@@ -947,7 +947,31 @@ def test_strip_lstrip_rstrip_args_unicode(self):
947947
assert_series_equal(rs, xp)
948948

949949
def test_wrap(self):
950-
pass
950+
# test values are: two words less than width, two words equal to width,
951+
# two words greater than width, one word less than width, one word
952+
# equal to width, one word greater than width, multiple tokens with trailing
953+
# whitespace equal to width
954+
values = Series([u('hello world'), u('hello world!'),
955+
u('hello world!!'), u('abcdefabcde'),
956+
u('abcdefabcdef'), u('abcdefabcdefa'),
957+
u('ab ab ab ab '), u('ab ab ab ab a'),
958+
u('\t')])
959+
960+
# expected values
961+
xp = Series([u('hello world'), u('hello world!'),
962+
u('hello\nworld!!'), u('abcdefabcde'),
963+
u('abcdefabcdef'), u('abcdefabcdef\na'),
964+
u('ab ab ab ab'), u('ab ab ab ab\na'),
965+
u('')])
966+
967+
rs = values.str.wrap(12, break_long_words=True)
968+
assert_series_equal(rs, xp)
969+
970+
# test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode
971+
values = Series([' pre ', np.nan, u('\xac\u20ac\U00008000 abadcafe')])
972+
xp = Series([' pre', NA, u('\xac\u20ac\U00008000 ab\nadcafe')])
973+
rs = values.str.wrap(6)
974+
assert_series_equal(rs, xp)
951975

952976
def test_get(self):
953977
values = Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])

0 commit comments

Comments
 (0)