Skip to content

Commit 7bee353

Browse files
ryankarlosjreback
authored andcommitted
DOC: updated the Series.str.rsplit and Series.str.split docstrings (#21026)
1 parent 5fdaa97 commit 7bee353

File tree

1 file changed

+123
-121
lines changed

1 file changed

+123
-121
lines changed

pandas/core/strings.py

+123-121
Original file line numberDiff line numberDiff line change
@@ -1343,108 +1343,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
13431343

13441344

13451345
def str_split(arr, pat=None, n=None):
1346-
"""
1347-
Split strings around given separator/delimiter.
1348-
1349-
Split each string in the caller's values by given
1350-
pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1351-
1352-
Parameters
1353-
----------
1354-
pat : str, optional
1355-
String or regular expression to split on.
1356-
If not specified, split on whitespace.
1357-
n : int, default -1 (all)
1358-
Limit number of splits in output.
1359-
``None``, 0 and -1 will be interpreted as return all splits.
1360-
expand : bool, default False
1361-
Expand the split strings into separate columns.
1362-
1363-
* If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1364-
* If ``False``, return Series/Index, containing lists of strings.
13651346

1366-
Returns
1367-
-------
1368-
Series, Index, DataFrame or MultiIndex
1369-
Type matches caller unless ``expand=True`` (see Notes).
1370-
1371-
Notes
1372-
-----
1373-
The handling of the `n` keyword depends on the number of found splits:
1374-
1375-
- If found splits > `n`, make first `n` splits only
1376-
- If found splits <= `n`, make all splits
1377-
- If for a certain row the number of found splits < `n`,
1378-
append `None` for padding up to `n` if ``expand=True``
1379-
1380-
If using ``expand=True``, Series and Index callers return DataFrame and
1381-
MultiIndex objects, respectively.
1382-
1383-
See Also
1384-
--------
1385-
str.split : Standard library version of this method.
1386-
Series.str.get_dummies : Split each string into dummy variables.
1387-
Series.str.partition : Split string on a separator, returning
1388-
the before, separator, and after components.
1389-
1390-
Examples
1391-
--------
1392-
>>> s = pd.Series(["this is good text", "but this is even better"])
1393-
1394-
By default, split will return an object of the same size
1395-
having lists containing the split elements
1396-
1397-
>>> s.str.split()
1398-
0 [this, is, good, text]
1399-
1 [but, this, is, even, better]
1400-
dtype: object
1401-
>>> s.str.split("random")
1402-
0 [this is good text]
1403-
1 [but this is even better]
1404-
dtype: object
1405-
1406-
When using ``expand=True``, the split elements will expand out into
1407-
separate columns.
1408-
1409-
For Series object, output return type is DataFrame.
1410-
1411-
>>> s.str.split(expand=True)
1412-
0 1 2 3 4
1413-
0 this is good text None
1414-
1 but this is even better
1415-
>>> s.str.split(" is ", expand=True)
1416-
0 1
1417-
0 this good text
1418-
1 but this even better
1419-
1420-
For Index object, output return type is MultiIndex.
1421-
1422-
>>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
1423-
>>> i.str.split(expand=True)
1424-
MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
1425-
labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
1426-
1427-
Parameter `n` can be used to limit the number of splits in the output.
1428-
1429-
>>> s.str.split("is", n=1)
1430-
0 [th, is good text]
1431-
1 [but th, is even better]
1432-
dtype: object
1433-
>>> s.str.split("is", n=1, expand=True)
1434-
0 1
1435-
0 th is good text
1436-
1 but th is even better
1437-
1438-
If NaN is present, it is propagated throughout the columns
1439-
during the split.
1440-
1441-
>>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1442-
>>> s.str.split(n=3, expand=True)
1443-
0 1 2 3
1444-
0 this is good text
1445-
1 but this is even better
1446-
2 NaN NaN NaN NaN
1447-
"""
14481347
if pat is None:
14491348
if n is None or n == 0:
14501349
n = -1
@@ -1464,25 +1363,7 @@ def str_split(arr, pat=None, n=None):
14641363

14651364

14661365
def str_rsplit(arr, pat=None, n=None):
1467-
"""
1468-
Split each string in the Series/Index by the given delimiter
1469-
string, starting at the end of the string and working to the front.
1470-
Equivalent to :meth:`str.rsplit`.
14711366

1472-
Parameters
1473-
----------
1474-
pat : string, default None
1475-
Separator to split on. If None, splits on whitespace
1476-
n : int, default -1 (all)
1477-
None, 0 and -1 will be interpreted as return all splits
1478-
expand : bool, default False
1479-
* If True, return DataFrame/MultiIndex expanding dimensionality.
1480-
* If False, return Series/Index.
1481-
1482-
Returns
1483-
-------
1484-
split : Series/Index or DataFrame/MultiIndex of objects
1485-
"""
14861367
if n is None or n == 0:
14871368
n = -1
14881369
f = lambda x: x.rsplit(pat, n)
@@ -2325,12 +2206,133 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23252206
res = Series(res, index=data.index, name=self._orig.name)
23262207
return res
23272208

2328-
@copy(str_split)
2209+
_shared_docs['str_split'] = ("""
2210+
Split strings around given separator/delimiter.
2211+
2212+
Splits the string in the Series/Index from the %(side)s,
2213+
at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
2214+
2215+
Parameters
2216+
----------
2217+
pat : str, optional
2218+
String or regular expression to split on.
2219+
If not specified, split on whitespace.
2220+
n : int, default -1 (all)
2221+
Limit number of splits in output.
2222+
``None``, 0 and -1 will be interpreted as return all splits.
2223+
expand : bool, default False
2224+
Expand the splitted strings into separate columns.
2225+
2226+
* If ``True``, return DataFrame/MultiIndex expanding dimensionality.
2227+
* If ``False``, return Series/Index, containing lists of strings.
2228+
2229+
Returns
2230+
-------
2231+
Series, Index, DataFrame or MultiIndex
2232+
Type matches caller unless ``expand=True`` (see Notes).
2233+
2234+
See Also
2235+
--------
2236+
Series.str.split : Split strings around given separator/delimiter.
2237+
Series.str.rsplit : Splits string around given separator/delimiter,
2238+
starting from the right.
2239+
Series.str.join : Join lists contained as elements in the Series/Index
2240+
with passed delimiter.
2241+
str.split : Standard library version for split.
2242+
str.rsplit : Standard library version for rsplit.
2243+
2244+
Notes
2245+
-----
2246+
The handling of the `n` keyword depends on the number of found splits:
2247+
2248+
- If found splits > `n`, make first `n` splits only
2249+
- If found splits <= `n`, make all splits
2250+
- If for a certain row the number of found splits < `n`,
2251+
append `None` for padding up to `n` if ``expand=True``
2252+
2253+
If using ``expand=True``, Series and Index callers return DataFrame and
2254+
MultiIndex objects, respectively.
2255+
2256+
Examples
2257+
--------
2258+
>>> s = pd.Series(["this is a regular sentence",
2259+
"https://docs.python.org/3/tutorial/index.html", np.nan])
2260+
2261+
In the default setting, the string is split by whitespace.
2262+
2263+
>>> s.str.split()
2264+
0 [this, is, a, regular, sentence]
2265+
1 [https://docs.python.org/3/tutorial/index.html]
2266+
2 NaN
2267+
dtype: object
2268+
2269+
Without the `n` parameter, the outputs of `rsplit` and `split`
2270+
are identical.
2271+
2272+
>>> s.str.rsplit()
2273+
0 [this, is, a, regular, sentence]
2274+
1 [https://docs.python.org/3/tutorial/index.html]
2275+
2 NaN
2276+
dtype: object
2277+
2278+
The `n` parameter can be used to limit the number of splits on the
2279+
delimiter. The outputs of `split` and `rsplit` are different.
2280+
2281+
>>> s.str.split(n=2)
2282+
0 [this, is, a regular sentence]
2283+
1 [https://docs.python.org/3/tutorial/index.html]
2284+
2 NaN
2285+
dtype: object
2286+
2287+
>>> s.str.rsplit(n=2)
2288+
0 [this is a, regular, sentence]
2289+
1 [https://docs.python.org/3/tutorial/index.html]
2290+
2 NaN
2291+
dtype: object
2292+
2293+
The `pat` parameter can be used to split by other characters.
2294+
2295+
>>> s.str.split(pat = "/")
2296+
0 [this is a regular sentence]
2297+
1 [https:, , docs.python.org, 3, tutorial, index...
2298+
2 NaN
2299+
dtype: object
2300+
2301+
When using ``expand=True``, the split elements will expand out into
2302+
separate columns. If NaN is present, it is propagated throughout
2303+
the columns during the split.
2304+
2305+
>>> s.str.split(expand=True)
2306+
0 1 2 3
2307+
0 this is a regular
2308+
1 https://docs.python.org/3/tutorial/index.html None None None
2309+
2 NaN NaN NaN NaN \
2310+
2311+
4
2312+
0 sentence
2313+
1 None
2314+
2 NaN
2315+
2316+
For slightly more complex use cases like splitting the html document name
2317+
from a url, a combination of parameter settings can be used.
2318+
2319+
>>> s.str.rsplit("/", n=1, expand=True)
2320+
0 1
2321+
0 this is a regular sentence None
2322+
1 https://docs.python.org/3/tutorial index.html
2323+
2 NaN NaN
2324+
""")
2325+
2326+
@Appender(_shared_docs['str_split'] % {
2327+
'side': 'beginning',
2328+
'method': 'split'})
23292329
def split(self, pat=None, n=-1, expand=False):
23302330
result = str_split(self._data, pat, n=n)
23312331
return self._wrap_result(result, expand=expand)
23322332

2333-
@copy(str_rsplit)
2333+
@Appender(_shared_docs['str_split'] % {
2334+
'side': 'end',
2335+
'method': 'rsplit'})
23342336
def rsplit(self, pat=None, n=-1, expand=False):
23352337
result = str_rsplit(self._data, pat, n=n)
23362338
return self._wrap_result(result, expand=expand)

0 commit comments

Comments
 (0)