From 9126c8249375855db127581796eee671eec6dabd Mon Sep 17 00:00:00 2001 From: Manan Pal Singh Date: Mon, 12 Mar 2018 17:45:14 +0530 Subject: [PATCH 1/4] updated doc for pandas.Series.str.split() method --- pandas/core/strings.py | 77 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index fac607f4621a8..d6406d4c3e4c2 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1095,24 +1095,85 @@ def str_pad(arr, width, side='left', fillchar=' '): def str_split(arr, pat=None, n=None): """ - Split each string (a la re.split) in the Series/Index by given - pattern, propagating NA values. Equivalent to :meth:`str.split`. + Split strings around given separator/delimiter. + + Split each string in the caller's values by given + pattern, propagating NaN values. Equivalent to :meth:`str.split`. Parameters ---------- - pat : string, default None - String or regular expression to split on. If None, splits on whitespace + pat : str, optional + String or regular expression to split on. + If `None`, split on whitespace. n : int, default -1 (all) - None, 0 and -1 will be interpreted as return all splits + Limit number of splits in output. + `None`, 0 and -1 will be interpreted as return all splits. expand : bool, default False - * If True, return DataFrame/MultiIndex expanding dimensionality. - * If False, return Series/Index. + Expand the splitted strings into separate columns. - return_type : deprecated, use `expand` + * If `True`, return DataFrame/MultiIndex expanding dimensionality. + * If `False`, return Series/Index, containing lists of strings. Returns ------- + Type matches caller unless `expand=True` (return type is `DataFrame` or + `MultiIndex`) split : Series/Index or DataFrame/MultiIndex of objects + + Notes + ----- + - If n >= default splits, makes all splits + - If n < default splits, makes first n splits only + - Appends `None` for padding if `expand=True` + + Examples + -------- + >>> s = pd.Series(["this is good text", "but this is even better"]) + + By default, split will return an object of the same size + having lists containing the split elements + + >>> s.str.split() + 0 [this, is, good, text] + 1 [but, this, is, even, better] + dtype: object + >>> s.str.split("random") + 0 [this is good text] + 1 [but this is even better] + dtype: object + + When using `expand=True`, the split elements will + expand out into separate columns. + + >>> s.str.split(expand=True) + 0 1 2 3 4 + 0 this is good text None + 1 but this is even better + >>> s.str.split(" is ", expand=True) + 0 1 + 0 this good text + 1 but this even better + + Parameter `n` can be used to limit the number of splits in the output. + + >>> s.str.split("is", n=1) + 0 [th, is good text] + 1 [but th, is even better] + dtype: object + >>> s.str.split("is", n=1, expand=True) + 0 1 + 0 th is good text + 1 but th is even better + + If NaN is present, it is propagated throughout the columns + during the split. + + >>> s = pd.Series(["this is good text", "but this is even better", np.nan]) + >>> s.str.split(n=3, expand=True) + 0 1 2 3 + 0 this is good text + 1 but this is even better + 2 NaN NaN NaN NaN """ if pat is None: if n is None or n == 0: From 2e13424fb477e8955c4e91d7b0396209e88df43b Mon Sep 17 00:00:00 2001 From: Manan Pal Singh Date: Mon, 12 Mar 2018 18:41:47 +0530 Subject: [PATCH 2/4] updated doc for pandas.Series.str.split() method --- pandas/core/strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index d6406d4c3e4c2..f8ee9cf34ca99 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1116,9 +1116,9 @@ def str_split(arr, pat=None, n=None): Returns ------- - Type matches caller unless `expand=True` (return type is `DataFrame` or - `MultiIndex`) split : Series/Index or DataFrame/MultiIndex of objects + Type matches caller unless `expand=True` (return type is `DataFrame` or + `MultiIndex`) Notes ----- From 0a1da963e910a39f56ba49e4ccf4b1599d462841 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 12 Mar 2018 15:46:03 +0100 Subject: [PATCH 3/4] update backticks --- pandas/core/strings.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index f8ee9cf34ca99..0914badca8454 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1104,27 +1104,27 @@ def str_split(arr, pat=None, n=None): ---------- pat : str, optional String or regular expression to split on. - If `None`, split on whitespace. + If not specified, split on whitespace. n : int, default -1 (all) Limit number of splits in output. - `None`, 0 and -1 will be interpreted as return all splits. + ``None``, 0 and -1 will be interpreted as return all splits. expand : bool, default False Expand the splitted strings into separate columns. - * If `True`, return DataFrame/MultiIndex expanding dimensionality. - * If `False`, return Series/Index, containing lists of strings. + * If ``True``, return DataFrame/MultiIndex expanding dimensionality. + * If ``False``, return Series/Index, containing lists of strings. Returns ------- split : Series/Index or DataFrame/MultiIndex of objects - Type matches caller unless `expand=True` (return type is `DataFrame` or - `MultiIndex`) + Type matches caller unless ``expand=True`` (return type is DataFrame or + MultiIndex) Notes ----- - If n >= default splits, makes all splits - If n < default splits, makes first n splits only - - Appends `None` for padding if `expand=True` + - Appends `None` for padding if ``expand=True`` Examples -------- @@ -1142,7 +1142,7 @@ def str_split(arr, pat=None, n=None): 1 [but this is even better] dtype: object - When using `expand=True`, the split elements will + When using ``expand=True``, the split elements will expand out into separate columns. >>> s.str.split(expand=True) From da27e5f4a83298834667569822a307528c19ba8d Mon Sep 17 00:00:00 2001 From: Manan Pal Singh Date: Mon, 12 Mar 2018 20:44:22 +0530 Subject: [PATCH 4/4] updated docstring for pandas.Series.str.split() method --- pandas/core/strings.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 0914badca8454..11081535cf63f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1122,9 +1122,12 @@ def str_split(arr, pat=None, n=None): Notes ----- - - If n >= default splits, makes all splits - - If n < default splits, makes first n splits only - - Appends `None` for padding if ``expand=True`` + The handling of the `n` keyword depends on the number of found splits: + + - If found splits > `n`, make first `n` splits only + - If found splits <= `n`, make all splits + - If for a certain row the number of found splits < `n`, + append `None` for padding up to `n` if ``expand=True`` Examples --------