From 5c1af683d66ef3326d1bce1c3789dbd1ea945c47 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 9 Feb 2024 14:11:29 -0700 Subject: [PATCH 1/5] DOC: fix PR02 errors in docstring for pandas.Series.str.wrap --- ci/code_checks.sh | 1 - pandas/core/strings/accessor.py | 45 +++++++++++++++++---------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a9280fc48af1a..d1708f1c1550e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -80,7 +80,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Series.dt.ceil\ pandas.Series.dt.month_name\ pandas.Series.dt.day_name\ - pandas.Series.str.wrap\ pandas.Series.cat.rename_categories\ pandas.Series.cat.reorder_categories\ pandas.Series.cat.add_categories\ diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index bd523969fba13..2d904dfe478d1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2214,32 +2214,33 @@ def wrap(self, width: int, **kwargs): r""" Wrap strings in Series/Index at specified line width. - This method has the same keyword parameters and defaults as - :class:`textwrap.TextWrapper`. - Parameters ---------- width : int Maximum line width. - expand_tabs : bool, optional - If True, tab characters will be expanded to spaces (default: True). - replace_whitespace : bool, optional - If True, each whitespace character (as defined by string.whitespace) - remaining after tab expansion will be replaced by a single space - (default: True). - drop_whitespace : bool, optional - If True, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True). - break_long_words : bool, optional - If True, then words longer than width will be broken in order to ensure - that no lines are longer than width. If it is false, long words will - not be broken, and some lines may be longer than width (default: True). - break_on_hyphens : bool, optional - If True, wrapping will occur preferably on whitespace and right after - hyphens in compound words, as it is customary in English. If false, - only whitespaces will be considered as potentially good places for line - breaks, but you need to set break_long_words to false if you want truly - insecable words (default: True). + **kwargs + This method has the same keyword parameters and defaults as + :class:`textwrap.TextWrapper`. + + expand_tabs : bool, optional + If True, tab characters will be expanded to spaces (default: True). + replace_whitespace : bool, optional + If True, each whitespace character (as defined by string.whitespace) + remaining after tab expansion will be replaced by a single space + (default: True). + drop_whitespace : bool, optional + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). + break_long_words : bool, optional + If True, then words longer than width will be broken in order to ensure + that no lines are longer than width. If it is false, long words will + not be broken, and some lines may be longer than width (default: True). + break_on_hyphens : bool, optional + If True, wrapping will occur preferably on whitespace and right after + hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line + breaks, but you need to set break_long_words to false if you want truly + insecable words (default: True). Returns ------- From 650e6917777dd82ac984d1e4c2926c3e46cf5e9d Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:31:19 -0700 Subject: [PATCH 2/5] change signature of wrap to include kwargs as explicit parameters --- pandas/core/strings/accessor.py | 64 ++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 2d904dfe478d1..362df6e14cbc6 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2210,37 +2210,44 @@ def removesuffix(self, suffix: str): return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) - def wrap(self, width: int, **kwargs): + def wrap( + self, + width: int, + expand_tabs: bool = True, + replace_whitespace: bool = True, + drop_whitespace: bool = True, + break_long_words: bool = True, + break_on_hyphens: bool = True, + ): r""" Wrap strings in Series/Index at specified line width. + This method has the same keyword parameters and defaults as + :class:`textwrap.TextWrapper`. + Parameters ---------- width : int Maximum line width. - **kwargs - This method has the same keyword parameters and defaults as - :class:`textwrap.TextWrapper`. - - expand_tabs : bool, optional - If True, tab characters will be expanded to spaces (default: True). - replace_whitespace : bool, optional - If True, each whitespace character (as defined by string.whitespace) - remaining after tab expansion will be replaced by a single space - (default: True). - drop_whitespace : bool, optional - If True, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True). - break_long_words : bool, optional - If True, then words longer than width will be broken in order to ensure - that no lines are longer than width. If it is false, long words will - not be broken, and some lines may be longer than width (default: True). - break_on_hyphens : bool, optional - If True, wrapping will occur preferably on whitespace and right after - hyphens in compound words, as it is customary in English. If false, - only whitespaces will be considered as potentially good places for line - breaks, but you need to set break_long_words to false if you want truly - insecable words (default: True). + expand_tabs : bool, optional + If True, tab characters will be expanded to spaces (default: True). + replace_whitespace : bool, optional + If True, each whitespace character (as defined by string.whitespace) + remaining after tab expansion will be replaced by a single space + (default: True). + drop_whitespace : bool, optional + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). + break_long_words : bool, optional + If True, then words longer than width will be broken in order to ensure + that no lines are longer than width. If it is false, long words will + not be broken, and some lines may be longer than width (default: True). + break_on_hyphens : bool, optional + If True, wrapping will occur preferably on whitespace and right after + hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line + breaks, but you need to set break_long_words to false if you want truly + insecable words (default: True). Returns ------- @@ -2266,7 +2273,14 @@ def wrap(self, width: int, **kwargs): 1 another line\nto be\nwrapped dtype: object """ - result = self._data.array._str_wrap(width, **kwargs) + result = self._data.array._str_wrap( + width, + expand_tabs, + replace_whitespace, + drop_whitespace, + break_long_words, + break_on_hyphens, + ) return self._wrap_result(result) @forbid_nonstring_types(["bytes"]) From ac2c23c49e1602432d4c82b4a1bb89ff4ca23fa5 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 9 Feb 2024 16:16:49 -0700 Subject: [PATCH 3/5] fixing call to _str_wrap() --- pandas/core/strings/accessor.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 362df6e14cbc6..be3c2e03d6288 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2274,12 +2274,12 @@ def wrap( dtype: object """ result = self._data.array._str_wrap( - width, - expand_tabs, - replace_whitespace, - drop_whitespace, - break_long_words, - break_on_hyphens, + width=width, + expand_tabs=expand_tabs, + replace_whitespace=replace_whitespace, + drop_whitespace=drop_whitespace, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) return self._wrap_result(result) From e15af1e85a598ac54aabc8f61e1f7b81bde4a5d4 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:17:18 -0700 Subject: [PATCH 4/5] Added all textwrap.TextWrapper parameters --- pandas/core/strings/accessor.py | 50 +++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index be3c2e03d6288..cb7dd4ba477a1 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2212,12 +2212,18 @@ def removesuffix(self, suffix: str): @forbid_nonstring_types(["bytes"]) def wrap( self, - width: int, + width: int = 70, expand_tabs: bool = True, + tabsize: int = 8, replace_whitespace: bool = True, drop_whitespace: bool = True, + initial_indent: str = "", + subsequent_indent: str = "", + fix_sentence_endings: bool = False, break_long_words: bool = True, break_on_hyphens: bool = True, + max_lines: int | None = None, + placeholder: str = " [...]", ): r""" Wrap strings in Series/Index at specified line width. @@ -2227,10 +2233,14 @@ def wrap( Parameters ---------- - width : int - Maximum line width. + width : int, optional + Maximum line width (default: 70). expand_tabs : bool, optional If True, tab characters will be expanded to spaces (default: True). + tabsize : int, optional + If expand_tabs is true, then all tab characters in text will be + expanded to zero or more spaces, depending on the current column + and the given tab size (default: 8). replace_whitespace : bool, optional If True, each whitespace character (as defined by string.whitespace) remaining after tab expansion will be replaced by a single space @@ -2238,6 +2248,28 @@ def wrap( drop_whitespace : bool, optional If True, whitespace that, after wrapping, happens to end up at the beginning or end of a line is dropped (default: True). + initial_indent : str, optional + String that will be prepended to the first line of wrapped output. + Counts towards the length of the first line. The empty string is + not indented (default: ''). + subsequent_indent : str, optional + String that will be prepended to all lines of wrapped output except + the first. Counts towards the length of each line except the first + (default: ''). + fix_sentence_endings : bool, optional + If true, TextWrapper attempts to detect sentence endings and ensure + that sentences are always separated by exactly two spaces. This is + generally desired for text in a monospaced font. However, the sentence + detection algorithm is imperfect: it assumes that a sentence ending + consists of a lowercase letter followed by one of '.', '!', or '?', + possibly followed by one of '"' or "'", followed by a space. One + problem with this algorithm is that it is unable to detect the + difference between “Dr.” in `[...] Dr. Frankenstein's monster [...]` + and “Spot.” in `[...] See Spot. See Spot run [...]` + Since the sentence detection algorithm relies on string.lowercase + for the definition of “lowercase letter”, and a convention of using + two spaces after a period to separate sentences on the same line, + it is specific to English-language texts (default: False). break_long_words : bool, optional If True, then words longer than width will be broken in order to ensure that no lines are longer than width. If it is false, long words will @@ -2248,6 +2280,12 @@ def wrap( only whitespaces will be considered as potentially good places for line breaks, but you need to set break_long_words to false if you want truly insecable words (default: True). + max_lines : int, optional + If not None, then the output will contain at most max_lines lines, with + placeholder appearing at the end of the output (default: None). + placeholder : str, optional + String that will appear at the end of the output text if it has been + truncated (default: ' [...]'). Returns ------- @@ -2276,10 +2314,16 @@ def wrap( result = self._data.array._str_wrap( width=width, expand_tabs=expand_tabs, + tabsize=tabsize, replace_whitespace=replace_whitespace, drop_whitespace=drop_whitespace, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + fix_sentence_endings=fix_sentence_endings, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens, + max_lines=max_lines, + placeholder=placeholder, ) return self._wrap_result(result) From fe8d088e929adf462cb2aeaa3e764353f438f354 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:56:50 -0700 Subject: [PATCH 5/5] update width argument to remain positional by default --- pandas/core/strings/accessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index cb7dd4ba477a1..601150ba7a85a 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2212,7 +2212,7 @@ def removesuffix(self, suffix: str): @forbid_nonstring_types(["bytes"]) def wrap( self, - width: int = 70, + width: int, expand_tabs: bool = True, tabsize: int = 8, replace_whitespace: bool = True, @@ -2234,7 +2234,7 @@ def wrap( Parameters ---------- width : int, optional - Maximum line width (default: 70). + Maximum line width. expand_tabs : bool, optional If True, tab characters will be expanded to spaces (default: True). tabsize : int, optional