@@ -516,7 +516,6 @@ def test_match(self):
516
516
517
517
def test_extract (self ):
518
518
# Contains tests like those in test_match and some others.
519
-
520
519
values = Series (['fooBAD__barBAD' , NA , 'foo' ])
521
520
er = [NA , NA ] # empty row
522
521
@@ -540,15 +539,30 @@ def test_extract(self):
540
539
exp = DataFrame ([[u ('BAD__' ), u ('BAD' )], er , er ])
541
540
tm .assert_frame_equal (result , exp )
542
541
543
- # no groups
544
- s = Series (['A1' , 'B2' , 'C3' ])
545
- f = lambda : s .str .extract ('[ABC][123]' )
546
- self .assertRaises (ValueError , f )
547
-
548
- # only non-capturing groups
549
- f = lambda : s .str .extract ('(?:[AB]).*' )
550
- self .assertRaises (ValueError , f )
542
+ # Index only works with one regex group since
543
+ # multi-group would expand to a frame
544
+ idx = Index (['A1' , 'A2' , 'A3' , 'A4' , 'B5' ])
545
+ with tm .assertRaisesRegexp (ValueError , "supported" ):
546
+ idx .str .extract ('([AB])([123])' )
547
+
548
+ # these should work for both Series and Index
549
+ for klass in [Series , Index ]:
550
+ # no groups
551
+ s_or_idx = klass (['A1' , 'B2' , 'C3' ])
552
+ f = lambda : s_or_idx .str .extract ('[ABC][123]' )
553
+ self .assertRaises (ValueError , f )
554
+
555
+ # only non-capturing groups
556
+ f = lambda : s_or_idx .str .extract ('(?:[AB]).*' )
557
+ self .assertRaises (ValueError , f )
558
+
559
+ # single group renames series/index properly
560
+ s_or_idx = klass (['A1' , 'A2' ])
561
+ result = s_or_idx .str .extract (r'(?P<uno>A)\d' )
562
+ tm .assert_equal (result .name , 'uno' )
563
+ tm .assert_array_equal (result , klass (['A' , 'A' ]))
551
564
565
+ s = Series (['A1' , 'B2' , 'C3' ])
552
566
# one group, no matches
553
567
result = s .str .extract ('(_)' )
554
568
exp = Series ([NA , NA , NA ], dtype = object )
@@ -569,14 +583,16 @@ def test_extract(self):
569
583
exp = DataFrame ([['A' , '1' ], ['B' , '2' ], [NA , NA ]])
570
584
tm .assert_frame_equal (result , exp )
571
585
572
- # named group/groups
573
- result = s .str .extract ('(?P<letter>[AB])(?P<number>[123])' )
574
- exp = DataFrame ([['A' , '1' ], ['B' , '2' ], [NA , NA ]], columns = ['letter' , 'number' ])
575
- tm .assert_frame_equal (result , exp )
586
+ # one named group
576
587
result = s .str .extract ('(?P<letter>[AB])' )
577
588
exp = Series (['A' , 'B' , NA ], name = 'letter' )
578
589
tm .assert_series_equal (result , exp )
579
590
591
+ # two named groups
592
+ result = s .str .extract ('(?P<letter>[AB])(?P<number>[123])' )
593
+ exp = DataFrame ([['A' , '1' ], ['B' , '2' ], [NA , NA ]], columns = ['letter' , 'number' ])
594
+ tm .assert_frame_equal (result , exp )
595
+
580
596
# mix named and unnamed groups
581
597
result = s .str .extract ('([AB])(?P<number>[123])' )
582
598
exp = DataFrame ([['A' , '1' ], ['B' , '2' ], [NA , NA ]], columns = [0 , 'number' ])
@@ -602,11 +618,6 @@ def test_extract(self):
602
618
exp = DataFrame ([['A' , '1' ], ['B' , '2' ], ['C' , NA ]], columns = ['letter' , 'number' ])
603
619
tm .assert_frame_equal (result , exp )
604
620
605
- # single group renames series properly
606
- s = Series (['A1' , 'A2' ])
607
- result = s .str .extract (r'(?P<uno>A)\d' )
608
- tm .assert_equal (result .name , 'uno' )
609
-
610
621
# GH6348
611
622
# not passing index to the extractor
612
623
def check_index (index ):
@@ -752,6 +763,12 @@ def test_get_dummies(self):
752
763
columns = list ('7ab' ))
753
764
tm .assert_frame_equal (result , expected )
754
765
766
+ # GH9980
767
+ # Index.str does not support get_dummies() as it returns a frame
768
+ with tm .assertRaisesRegexp (TypeError , "not supported" ):
769
+ idx = Index (['a|b' , 'a|c' , 'b|c' ])
770
+ idx .str .get_dummies ('|' )
771
+
755
772
def test_join (self ):
756
773
values = Series (['a_b_c' , 'c_d_e' , np .nan , 'f_g_h' ])
757
774
result = values .str .split ('_' ).str .join ('_' )
0 commit comments