@@ -1343,108 +1343,7 @@ def str_pad(arr, width, side='left', fillchar=' '):
1343
1343
1344
1344
1345
1345
def str_split (arr , pat = None , n = None ):
1346
- """
1347
- Split strings around given separator/delimiter.
1348
-
1349
- Split each string in the caller's values by given
1350
- pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1351
-
1352
- Parameters
1353
- ----------
1354
- pat : str, optional
1355
- String or regular expression to split on.
1356
- If not specified, split on whitespace.
1357
- n : int, default -1 (all)
1358
- Limit number of splits in output.
1359
- ``None``, 0 and -1 will be interpreted as return all splits.
1360
- expand : bool, default False
1361
- Expand the split strings into separate columns.
1362
-
1363
- * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1364
- * If ``False``, return Series/Index, containing lists of strings.
1365
1346
1366
- Returns
1367
- -------
1368
- Series, Index, DataFrame or MultiIndex
1369
- Type matches caller unless ``expand=True`` (see Notes).
1370
-
1371
- Notes
1372
- -----
1373
- The handling of the `n` keyword depends on the number of found splits:
1374
-
1375
- - If found splits > `n`, make first `n` splits only
1376
- - If found splits <= `n`, make all splits
1377
- - If for a certain row the number of found splits < `n`,
1378
- append `None` for padding up to `n` if ``expand=True``
1379
-
1380
- If using ``expand=True``, Series and Index callers return DataFrame and
1381
- MultiIndex objects, respectively.
1382
-
1383
- See Also
1384
- --------
1385
- str.split : Standard library version of this method.
1386
- Series.str.get_dummies : Split each string into dummy variables.
1387
- Series.str.partition : Split string on a separator, returning
1388
- the before, separator, and after components.
1389
-
1390
- Examples
1391
- --------
1392
- >>> s = pd.Series(["this is good text", "but this is even better"])
1393
-
1394
- By default, split will return an object of the same size
1395
- having lists containing the split elements
1396
-
1397
- >>> s.str.split()
1398
- 0 [this, is, good, text]
1399
- 1 [but, this, is, even, better]
1400
- dtype: object
1401
- >>> s.str.split("random")
1402
- 0 [this is good text]
1403
- 1 [but this is even better]
1404
- dtype: object
1405
-
1406
- When using ``expand=True``, the split elements will expand out into
1407
- separate columns.
1408
-
1409
- For Series object, output return type is DataFrame.
1410
-
1411
- >>> s.str.split(expand=True)
1412
- 0 1 2 3 4
1413
- 0 this is good text None
1414
- 1 but this is even better
1415
- >>> s.str.split(" is ", expand=True)
1416
- 0 1
1417
- 0 this good text
1418
- 1 but this even better
1419
-
1420
- For Index object, output return type is MultiIndex.
1421
-
1422
- >>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
1423
- >>> i.str.split(expand=True)
1424
- MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
1425
- labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
1426
-
1427
- Parameter `n` can be used to limit the number of splits in the output.
1428
-
1429
- >>> s.str.split("is", n=1)
1430
- 0 [th, is good text]
1431
- 1 [but th, is even better]
1432
- dtype: object
1433
- >>> s.str.split("is", n=1, expand=True)
1434
- 0 1
1435
- 0 th is good text
1436
- 1 but th is even better
1437
-
1438
- If NaN is present, it is propagated throughout the columns
1439
- during the split.
1440
-
1441
- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1442
- >>> s.str.split(n=3, expand=True)
1443
- 0 1 2 3
1444
- 0 this is good text
1445
- 1 but this is even better
1446
- 2 NaN NaN NaN NaN
1447
- """
1448
1347
if pat is None :
1449
1348
if n is None or n == 0 :
1450
1349
n = - 1
@@ -1464,25 +1363,7 @@ def str_split(arr, pat=None, n=None):
1464
1363
1465
1364
1466
1365
def str_rsplit (arr , pat = None , n = None ):
1467
- """
1468
- Split each string in the Series/Index by the given delimiter
1469
- string, starting at the end of the string and working to the front.
1470
- Equivalent to :meth:`str.rsplit`.
1471
1366
1472
- Parameters
1473
- ----------
1474
- pat : string, default None
1475
- Separator to split on. If None, splits on whitespace
1476
- n : int, default -1 (all)
1477
- None, 0 and -1 will be interpreted as return all splits
1478
- expand : bool, default False
1479
- * If True, return DataFrame/MultiIndex expanding dimensionality.
1480
- * If False, return Series/Index.
1481
-
1482
- Returns
1483
- -------
1484
- split : Series/Index or DataFrame/MultiIndex of objects
1485
- """
1486
1367
if n is None or n == 0 :
1487
1368
n = - 1
1488
1369
f = lambda x : x .rsplit (pat , n )
@@ -2325,12 +2206,133 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2325
2206
res = Series (res , index = data .index , name = self ._orig .name )
2326
2207
return res
2327
2208
2328
- @copy (str_split )
2209
+ _shared_docs ['str_split' ] = ("""
2210
+ Split strings around given separator/delimiter.
2211
+
2212
+ Splits the string in the Series/Index from the %(side)s,
2213
+ at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
2214
+
2215
+ Parameters
2216
+ ----------
2217
+ pat : str, optional
2218
+ String or regular expression to split on.
2219
+ If not specified, split on whitespace.
2220
+ n : int, default -1 (all)
2221
+ Limit number of splits in output.
2222
+ ``None``, 0 and -1 will be interpreted as return all splits.
2223
+ expand : bool, default False
2224
+ Expand the splitted strings into separate columns.
2225
+
2226
+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
2227
+ * If ``False``, return Series/Index, containing lists of strings.
2228
+
2229
+ Returns
2230
+ -------
2231
+ Series, Index, DataFrame or MultiIndex
2232
+ Type matches caller unless ``expand=True`` (see Notes).
2233
+
2234
+ See Also
2235
+ --------
2236
+ Series.str.split : Split strings around given separator/delimiter.
2237
+ Series.str.rsplit : Splits string around given separator/delimiter,
2238
+ starting from the right.
2239
+ Series.str.join : Join lists contained as elements in the Series/Index
2240
+ with passed delimiter.
2241
+ str.split : Standard library version for split.
2242
+ str.rsplit : Standard library version for rsplit.
2243
+
2244
+ Notes
2245
+ -----
2246
+ The handling of the `n` keyword depends on the number of found splits:
2247
+
2248
+ - If found splits > `n`, make first `n` splits only
2249
+ - If found splits <= `n`, make all splits
2250
+ - If for a certain row the number of found splits < `n`,
2251
+ append `None` for padding up to `n` if ``expand=True``
2252
+
2253
+ If using ``expand=True``, Series and Index callers return DataFrame and
2254
+ MultiIndex objects, respectively.
2255
+
2256
+ Examples
2257
+ --------
2258
+ >>> s = pd.Series(["this is a regular sentence",
2259
+ "https://docs.python.org/3/tutorial/index.html", np.nan])
2260
+
2261
+ In the default setting, the string is split by whitespace.
2262
+
2263
+ >>> s.str.split()
2264
+ 0 [this, is, a, regular, sentence]
2265
+ 1 [https://docs.python.org/3/tutorial/index.html]
2266
+ 2 NaN
2267
+ dtype: object
2268
+
2269
+ Without the `n` parameter, the outputs of `rsplit` and `split`
2270
+ are identical.
2271
+
2272
+ >>> s.str.rsplit()
2273
+ 0 [this, is, a, regular, sentence]
2274
+ 1 [https://docs.python.org/3/tutorial/index.html]
2275
+ 2 NaN
2276
+ dtype: object
2277
+
2278
+ The `n` parameter can be used to limit the number of splits on the
2279
+ delimiter. The outputs of `split` and `rsplit` are different.
2280
+
2281
+ >>> s.str.split(n=2)
2282
+ 0 [this, is, a regular sentence]
2283
+ 1 [https://docs.python.org/3/tutorial/index.html]
2284
+ 2 NaN
2285
+ dtype: object
2286
+
2287
+ >>> s.str.rsplit(n=2)
2288
+ 0 [this is a, regular, sentence]
2289
+ 1 [https://docs.python.org/3/tutorial/index.html]
2290
+ 2 NaN
2291
+ dtype: object
2292
+
2293
+ The `pat` parameter can be used to split by other characters.
2294
+
2295
+ >>> s.str.split(pat = "/")
2296
+ 0 [this is a regular sentence]
2297
+ 1 [https:, , docs.python.org, 3, tutorial, index...
2298
+ 2 NaN
2299
+ dtype: object
2300
+
2301
+ When using ``expand=True``, the split elements will expand out into
2302
+ separate columns. If NaN is present, it is propagated throughout
2303
+ the columns during the split.
2304
+
2305
+ >>> s.str.split(expand=True)
2306
+ 0 1 2 3
2307
+ 0 this is a regular
2308
+ 1 https://docs.python.org/3/tutorial/index.html None None None
2309
+ 2 NaN NaN NaN NaN \
2310
+
2311
+ 4
2312
+ 0 sentence
2313
+ 1 None
2314
+ 2 NaN
2315
+
2316
+ For slightly more complex use cases like splitting the html document name
2317
+ from a url, a combination of parameter settings can be used.
2318
+
2319
+ >>> s.str.rsplit("/", n=1, expand=True)
2320
+ 0 1
2321
+ 0 this is a regular sentence None
2322
+ 1 https://docs.python.org/3/tutorial index.html
2323
+ 2 NaN NaN
2324
+ """ )
2325
+
2326
+ @Appender (_shared_docs ['str_split' ] % {
2327
+ 'side' : 'beginning' ,
2328
+ 'method' : 'split' })
2329
2329
def split (self , pat = None , n = - 1 , expand = False ):
2330
2330
result = str_split (self ._data , pat , n = n )
2331
2331
return self ._wrap_result (result , expand = expand )
2332
2332
2333
- @copy (str_rsplit )
2333
+ @Appender (_shared_docs ['str_split' ] % {
2334
+ 'side' : 'end' ,
2335
+ 'method' : 'rsplit' })
2334
2336
def rsplit (self , pat = None , n = - 1 , expand = False ):
2335
2337
result = str_rsplit (self ._data , pat , n = n )
2336
2338
return self ._wrap_result (result , expand = expand )
0 commit comments