@@ -676,6 +676,7 @@ def test_empty_str_methods(self):
676
676
tm .assert_series_equal (empty_str , empty .str .pad (42 ))
677
677
tm .assert_series_equal (empty_str , empty .str .center (42 ))
678
678
tm .assert_series_equal (empty_list , empty .str .split ('a' ))
679
+ tm .assert_series_equal (empty_list , empty .str .rsplit ('a' ))
679
680
tm .assert_series_equal (empty_list , empty .str .partition ('a' , expand = False ))
680
681
tm .assert_series_equal (empty_list , empty .str .rpartition ('a' , expand = False ))
681
682
tm .assert_series_equal (empty_str , empty .str .slice (stop = 1 ))
@@ -1212,15 +1213,15 @@ def test_split(self):
1212
1213
# mixed
1213
1214
mixed = Series (['a_b_c' , NA , 'd_e_f' , True , datetime .today (),
1214
1215
None , 1 , 2. ])
1215
- rs = mixed .str .split ('_' )
1216
- xp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
1216
+ result = mixed .str .split ('_' )
1217
+ exp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
1217
1218
NA , NA , NA ])
1218
- tm .assert_isinstance (rs , Series )
1219
- tm .assert_almost_equal (rs , xp )
1219
+ tm .assert_isinstance (result , Series )
1220
+ tm .assert_almost_equal (result , exp )
1220
1221
1221
- rs = mixed .str .split ('_' , expand = False )
1222
- tm .assert_isinstance (rs , Series )
1223
- tm .assert_almost_equal (rs , xp )
1222
+ result = mixed .str .split ('_' , expand = False )
1223
+ tm .assert_isinstance (result , Series )
1224
+ tm .assert_almost_equal (result , exp )
1224
1225
1225
1226
# unicode
1226
1227
values = Series ([u ('a_b_c' ), u ('c_d_e' ), NA , u ('f_g_h' )])
@@ -1234,12 +1235,75 @@ def test_split(self):
1234
1235
result = values .str .split ('_' , expand = False )
1235
1236
tm .assert_series_equal (result , exp )
1236
1237
1238
+ # regex split
1239
+ values = Series ([u ('a,b_c' ), u ('c_d,e' ), NA , u ('f,g,h' )])
1240
+ result = values .str .split ('[,_]' )
1241
+ exp = Series ([[u ('a' ), u ('b' ), u ('c' )],
1242
+ [u ('c' ), u ('d' ), u ('e' )], NA ,
1243
+ [u ('f' ), u ('g' ), u ('h' )]])
1244
+ tm .assert_series_equal (result , exp )
1245
+
1246
+ def test_rsplit (self ):
1247
+ values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
1248
+ result = values .str .rsplit ('_' )
1249
+ exp = Series ([['a' , 'b' , 'c' ], ['c' , 'd' , 'e' ], NA , ['f' , 'g' , 'h' ]])
1250
+ tm .assert_series_equal (result , exp )
1251
+
1252
+ # more than one char
1253
+ values = Series (['a__b__c' , 'c__d__e' , NA , 'f__g__h' ])
1254
+ result = values .str .rsplit ('__' )
1255
+ tm .assert_series_equal (result , exp )
1256
+
1257
+ result = values .str .rsplit ('__' , expand = False )
1258
+ tm .assert_series_equal (result , exp )
1259
+
1260
+ # mixed
1261
+ mixed = Series (['a_b_c' , NA , 'd_e_f' , True , datetime .today (),
1262
+ None , 1 , 2. ])
1263
+ result = mixed .str .rsplit ('_' )
1264
+ exp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
1265
+ NA , NA , NA ])
1266
+ tm .assert_isinstance (result , Series )
1267
+ tm .assert_almost_equal (result , exp )
1268
+
1269
+ result = mixed .str .rsplit ('_' , expand = False )
1270
+ tm .assert_isinstance (result , Series )
1271
+ tm .assert_almost_equal (result , exp )
1272
+
1273
+ # unicode
1274
+ values = Series ([u ('a_b_c' ), u ('c_d_e' ), NA , u ('f_g_h' )])
1275
+ result = values .str .rsplit ('_' )
1276
+ exp = Series ([[u ('a' ), u ('b' ), u ('c' )],
1277
+ [u ('c' ), u ('d' ), u ('e' )], NA ,
1278
+ [u ('f' ), u ('g' ), u ('h' )]])
1279
+ tm .assert_series_equal (result , exp )
1280
+
1281
+ result = values .str .rsplit ('_' , expand = False )
1282
+ tm .assert_series_equal (result , exp )
1283
+
1284
+ # regex split is not supported by rsplit
1285
+ values = Series ([u ('a,b_c' ), u ('c_d,e' ), NA , u ('f,g,h' )])
1286
+ result = values .str .rsplit ('[,_]' )
1287
+ exp = Series ([[u ('a,b_c' )],
1288
+ [u ('c_d,e' )],
1289
+ NA ,
1290
+ [u ('f,g,h' )]])
1291
+ tm .assert_series_equal (result , exp )
1292
+
1293
+ # setting max number of splits, make sure it's from reverse
1294
+ values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
1295
+ result = values .str .rsplit ('_' , n = 1 )
1296
+ exp = Series ([['a_b' , 'c' ], ['c_d' , 'e' ], NA , ['f_g' , 'h' ]])
1297
+ tm .assert_series_equal (result , exp )
1298
+
1237
1299
def test_split_noargs (self ):
1238
1300
# #1859
1239
1301
s = Series (['Wes McKinney' , 'Travis Oliphant' ])
1240
-
1241
1302
result = s .str .split ()
1242
- self .assertEqual (result [1 ], ['Travis' , 'Oliphant' ])
1303
+ expected = ['Travis' , 'Oliphant' ]
1304
+ self .assertEqual (result [1 ], expected )
1305
+ result = s .str .rsplit ()
1306
+ self .assertEqual (result [1 ], expected )
1243
1307
1244
1308
def test_split_maxsplit (self ):
1245
1309
# re.split 0, str.split -1
@@ -1348,6 +1412,55 @@ def test_split_to_multiindex_expand(self):
1348
1412
with tm .assertRaisesRegexp (ValueError , "expand must be" ):
1349
1413
idx .str .split ('_' , return_type = "some_invalid_type" )
1350
1414
1415
+ def test_rsplit_to_dataframe_expand (self ):
1416
+ s = Series (['nosplit' , 'alsonosplit' ])
1417
+ result = s .str .rsplit ('_' , expand = True )
1418
+ exp = DataFrame ({0 : Series (['nosplit' , 'alsonosplit' ])})
1419
+ tm .assert_frame_equal (result , exp )
1420
+
1421
+ s = Series (['some_equal_splits' , 'with_no_nans' ])
1422
+ result = s .str .rsplit ('_' , expand = True )
1423
+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['equal' , 'no' ],
1424
+ 2 : ['splits' , 'nans' ]})
1425
+ tm .assert_frame_equal (result , exp )
1426
+
1427
+ result = s .str .rsplit ('_' , expand = True , n = 2 )
1428
+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['equal' , 'no' ],
1429
+ 2 : ['splits' , 'nans' ]})
1430
+ tm .assert_frame_equal (result , exp )
1431
+
1432
+ result = s .str .rsplit ('_' , expand = True , n = 1 )
1433
+ exp = DataFrame ({0 : ['some_equal' , 'with_no' ],
1434
+ 1 : ['splits' , 'nans' ]})
1435
+ tm .assert_frame_equal (result , exp )
1436
+
1437
+ s = Series (['some_splits' , 'with_index' ], index = ['preserve' , 'me' ])
1438
+ result = s .str .rsplit ('_' , expand = True )
1439
+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['splits' , 'index' ]},
1440
+ index = ['preserve' , 'me' ])
1441
+ tm .assert_frame_equal (result , exp )
1442
+
1443
+ def test_rsplit_to_multiindex_expand (self ):
1444
+ idx = Index (['nosplit' , 'alsonosplit' ])
1445
+ result = idx .str .rsplit ('_' , expand = True )
1446
+ exp = Index ([np .array (['nosplit' ]), np .array (['alsonosplit' ])])
1447
+ tm .assert_index_equal (result , exp )
1448
+ self .assertEqual (result .nlevels , 1 )
1449
+
1450
+ idx = Index (['some_equal_splits' , 'with_no_nans' ])
1451
+ result = idx .str .rsplit ('_' , expand = True )
1452
+ exp = MultiIndex .from_tuples ([('some' , 'equal' , 'splits' ),
1453
+ ('with' , 'no' , 'nans' )])
1454
+ tm .assert_index_equal (result , exp )
1455
+ self .assertEqual (result .nlevels , 3 )
1456
+
1457
+ idx = Index (['some_equal_splits' , 'with_no_nans' ])
1458
+ result = idx .str .rsplit ('_' , expand = True , n = 1 )
1459
+ exp = MultiIndex .from_tuples ([('some_equal' , 'splits' ),
1460
+ ('with_no' , 'nans' )])
1461
+ tm .assert_index_equal (result , exp )
1462
+ self .assertEqual (result .nlevels , 2 )
1463
+
1351
1464
def test_partition_series (self ):
1352
1465
values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
1353
1466
0 commit comments