@@ -1346,104 +1346,20 @@ def str_split(arr, pat=None, n=None):
1346
1346
"""
1347
1347
Split strings around given separator/delimiter.
1348
1348
1349
- Split each string in the caller's values by given
1350
- pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1351
-
1352
1349
Parameters
1353
1350
----------
1354
1351
pat : str, optional
1355
- String or regular expression to split on.
1356
- If not specified, split on whitespace.
1352
+ String or regular expression to split on; If not specified,
1353
+ split on whitespace.
1357
1354
n : int, default -1 (all)
1358
- Limit number of splits in output.
1359
- ``None``, 0 and -1 will be interpreted as return all splits.
1355
+ Limit number of splits in output; ``None``, 0 and -1 will
1356
+ be interpreted as return all splits.
1360
1357
expand : bool, default False
1361
1358
Expand the splitted strings into separate columns.
1362
1359
1363
- * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1364
- * If ``False``, return Series/Index, containing lists of strings.
1365
-
1366
1360
Returns
1367
1361
-------
1368
- Series, Index, DataFrame or MultiIndex
1369
- Type matches caller unless ``expand=True`` (see Notes).
1370
-
1371
- Notes
1372
- -----
1373
- The handling of the `n` keyword depends on the number of found splits:
1374
-
1375
- - If found splits > `n`, make first `n` splits only
1376
- - If found splits <= `n`, make all splits
1377
- - If for a certain row the number of found splits < `n`,
1378
- append `None` for padding up to `n` if ``expand=True``
1379
-
1380
- If using ``expand=True``, Series and Index callers return DataFrame and
1381
- MultiIndex objects, respectively.
1382
-
1383
- See Also
1384
- --------
1385
- str.split : Standard library version of this method.
1386
- Series.str.get_dummies : Split each string into dummy variables.
1387
- Series.str.partition : Split string on a separator, returning
1388
- the before, separator, and after components.
1389
-
1390
- Examples
1391
- --------
1392
- >>> s = pd.Series(["this is good text", "but this is even better"])
1393
-
1394
- By default, split will return an object of the same size
1395
- having lists containing the split elements
1396
-
1397
- >>> s.str.split()
1398
- 0 [this, is, good, text]
1399
- 1 [but, this, is, even, better]
1400
- dtype: object
1401
- >>> s.str.split("random")
1402
- 0 [this is good text]
1403
- 1 [but this is even better]
1404
- dtype: object
1405
-
1406
- When using ``expand=True``, the split elements will expand out into
1407
- separate columns.
1408
-
1409
- For Series object, output return type is DataFrame.
1410
-
1411
- >>> s.str.split(expand=True)
1412
- 0 1 2 3 4
1413
- 0 this is good text None
1414
- 1 but this is even better
1415
- >>> s.str.split(" is ", expand=True)
1416
- 0 1
1417
- 0 this good text
1418
- 1 but this even better
1419
-
1420
- For Index object, output return type is MultiIndex.
1421
-
1422
- >>> i = pd.Index(["ba 100 001", "ba 101 002", "ba 102 003"])
1423
- >>> i.str.split(expand=True)
1424
- MultiIndex(levels=[['ba'], ['100', '101', '102'], ['001', '002', '003']],
1425
- labels=[[0, 0, 0], [0, 1, 2], [0, 1, 2]])
1426
-
1427
- Parameter `n` can be used to limit the number of splits in the output.
1428
-
1429
- >>> s.str.split("is", n=1)
1430
- 0 [th, is good text]
1431
- 1 [but th, is even better]
1432
- dtype: object
1433
- >>> s.str.split("is", n=1, expand=True)
1434
- 0 1
1435
- 0 th is good text
1436
- 1 but th is even better
1437
-
1438
- If NaN is present, it is propagated throughout the columns
1439
- during the split.
1440
-
1441
- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1442
- >>> s.str.split(n=3, expand=True)
1443
- 0 1 2 3
1444
- 0 this is good text
1445
- 1 but this is even better
1446
- 2 NaN NaN NaN NaN
1362
+ Series, Index, DataFrame or MultiIndex
1447
1363
"""
1448
1364
if pat is None :
1449
1365
if n is None or n == 0 :
@@ -1465,16 +1381,13 @@ def str_split(arr, pat=None, n=None):
1465
1381
1466
1382
def str_rsplit (arr , pat = None , n = None ):
1467
1383
"""
1468
- Split strings around given separator/delimiter.
1469
-
1470
- Returns a list of the words from each string in
1471
- Series/Index, separated by the delimiter string
1472
- (starting from the right). Equivalent to :meth:`str.rsplit`.
1384
+ Split strings around given separator/delimiter (starting from
1385
+ the right).
1473
1386
1474
1387
Parameters
1475
1388
----------
1476
1389
pat : string, default None
1477
- Separator to split on. If None, splits on whitespace.
1390
+ Separator to split on; If None, splits on whitespace.
1478
1391
n : int, default -1 (all)
1479
1392
None, 0 and -1 will be interpreted as return all splits.
1480
1393
expand : bool, default False
@@ -1483,54 +1396,7 @@ def str_rsplit(arr, pat=None, n=None):
1483
1396
1484
1397
Returns
1485
1398
-------
1486
- Series/Index or DataFrame/MultiIndex of objects
1487
-
1488
- See Also
1489
- --------
1490
- str.rsplit : Standard library version of this method.
1491
-
1492
- Examples
1493
- --------
1494
- >>> s = pd.Series(["this is good text", "but this is even better"])
1495
-
1496
- By default, split will return an object of the same size
1497
- having lists containing the split elements
1498
-
1499
- >>> s.str.rsplit()
1500
- 0 [this, is, good, text]
1501
- 1 [but, this, is, even, better]
1502
- dtype: object
1503
- >>> s.str.rsplit("random")
1504
- 0 [this is good text]
1505
- 1 [but this is even better]
1506
- dtype: object
1507
-
1508
- When using ''expand=True'', the split elements will expand out into
1509
- separate columns.
1510
-
1511
- For Series object, output return type is DataFrame.
1512
-
1513
- >>> s.str.rsplit(expand=True)
1514
- 0 1 2 3 4
1515
- 0 this is good text None
1516
- 1 but this is even better
1517
-
1518
- Parameter 'n' can be used to limit the number of splits in the output.
1519
-
1520
- >>> s.str.rsplit("is", n=1)
1521
- 0 [this , good text]
1522
- 1 [but this , even better]
1523
- dtype: object
1524
-
1525
- If NaN is present, it is propagated throughout the columns
1526
- during the split.
1527
-
1528
- >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1529
- >>> s.str.rsplit(n=3, expand=True)
1530
- 0 1 2 3
1531
- 0 this is good text
1532
- 1 but this is even better
1533
- 2 NaN NaN NaN NaN
1399
+ Series/Index or DataFrame/MultiIndex of objects
1534
1400
"""
1535
1401
if n is None or n == 0 :
1536
1402
n = - 1
@@ -2374,12 +2240,128 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2374
2240
res = Series (res , index = data .index , name = self ._orig .name )
2375
2241
return res
2376
2242
2377
- @copy (str_split )
2243
+ _shared_docs ['str_split' ] = ("""
2244
+ Split strings around given separator/delimiter.
2245
+
2246
+ Returns a list of the words from each string in Series/Index,
2247
+ split by the given delimiter string, starting at the %(side)s of the
2248
+ string. Equivalent to :meth:`str.%(method)s`.
2249
+
2250
+ Parameters
2251
+ ----------
2252
+ pat : str, optional
2253
+ String or regular expression to split on.
2254
+ If not specified, split on whitespace.
2255
+ n : int, default -1 (all)
2256
+ Limit number of splits in output.
2257
+ ``None``, 0 and -1 will be interpreted as return all splits.
2258
+ expand : bool, default False
2259
+ Expand the splitted strings into separate columns.
2260
+
2261
+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
2262
+ * If ``False``, return Series/Index, containing lists of strings.
2263
+
2264
+ Returns
2265
+ -------
2266
+ Series, Index, DataFrame or MultiIndex
2267
+ Type matches caller unless ``expand=True`` (see Notes).
2268
+
2269
+ Notes
2270
+ -----
2271
+ The handling of the `n` keyword depends on the number of found splits:
2272
+
2273
+ - If found splits > `n`, make first `n` splits only
2274
+ - If found splits <= `n`, make all splits
2275
+ - If for a certain row the number of found splits < `n`,
2276
+ append `None` for padding up to `n` if ``expand=True``
2277
+
2278
+ If using ``expand=True``, Series and Index callers return DataFrame and
2279
+ MultiIndex objects, respectively.
2280
+
2281
+ See Also
2282
+ --------
2283
+ %(also)s
2284
+
2285
+ Examples
2286
+ --------
2287
+ >>> s = pd.Series(["this is good text", "but this is even better"])
2288
+
2289
+ By default, split and rsplit will return an object of the same size
2290
+ having lists containing the split elements
2291
+
2292
+ >>> s.str.split()
2293
+ 0 [this, is, good, text]
2294
+ 1 [but, this, is, even, better]
2295
+ dtype: object
2296
+
2297
+ >>> s.str.rsplit()
2298
+ 0 [this, is, good, text]
2299
+ 1 [but, this, is, even, better]
2300
+ dtype: object
2301
+
2302
+ >>> s.str.split("random")
2303
+ 0 [this is good text]
2304
+ 1 [but this is even better]
2305
+ dtype: object
2306
+
2307
+ >>> s.str.rsplit("random")
2308
+ 0 [this is good text]
2309
+ 1 [but this is even better]
2310
+ dtype: object
2311
+
2312
+ When using ``expand=True``, the split and rsplit elements will expand out into
2313
+ separate columns.
2314
+
2315
+ For Series object, output return type is DataFrame.
2316
+
2317
+ >>> s.str.split(expand=True)
2318
+ 0 1 2 3 4
2319
+ 0 this is good text None
2320
+ 1 but this is even better
2321
+
2322
+ >>> s.str.split(" is ", expand=True)
2323
+ 0 1
2324
+ 0 this good text
2325
+ 1 but this even better
2326
+
2327
+ Parameter `n` can be used to limit the number of splits in the output.
2328
+
2329
+ >>> s.str.split("is", n=1)
2330
+ 0 [th, is good text]
2331
+ 1 [but th, is even better]
2332
+ dtype: object
2333
+
2334
+ >>> s.str.rsplit("is", n=1)
2335
+ 0 [this , good text]
2336
+ 1 [but this , even better]
2337
+ dtype: object
2338
+
2339
+ If NaN is present, it is propagated throughout the columns
2340
+ during the split.
2341
+
2342
+ >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
2343
+
2344
+ >>> s.str.split(n=3, expand=True)
2345
+ 0 1 2 3
2346
+ 0 this is good text
2347
+ 1 but this is even better
2348
+ 2 NaN NaN NaN NaN
2349
+
2350
+ >>> s.str.rsplit(n=3, expand=True)
2351
+ 0 1 2 3
2352
+ 0 this is good text
2353
+ 1 but this is even better
2354
+ 2 NaN NaN NaN NaN
2355
+ """ )
2356
+
2357
+ @Appender (_shared_docs ['str_split' ] % dict (side = 'start' ,
2358
+ method = 'split' ))
2378
2359
def split (self , pat = None , n = - 1 , expand = False ):
2379
2360
result = str_split (self ._data , pat , n = n )
2380
2361
return self ._wrap_result (result , expand = expand )
2381
2362
2382
- @copy (str_rsplit )
2363
+ @Appender (_shared_docs ['str_split' ] % dict (side = 'end' ,
2364
+ method = 'rsplit' ))
2383
2365
def rsplit (self , pat = None , n = - 1 , expand = False ):
2384
2366
result = str_rsplit (self ._data , pat , n = n )
2385
2367
return self ._wrap_result (result , expand = expand )
0 commit comments