@@ -1571,162 +1571,6 @@ def test_categorical_codes():
1571
1571
assert_type (cat .codes , "np_ndarray_int" )
1572
1572
1573
1573
1574
- def test_string_accessors ():
1575
- s = pd .Series (
1576
- ["applep" , "bananap" , "Cherryp" , "DATEp" , "eGGpLANTp" , "123p" , "23.45p" ]
1577
- )
1578
- s2 = pd .Series ([["apple" , "banana" ], ["cherry" , "date" ], [1 , "eggplant" ]])
1579
- s3 = pd .Series (["a1" , "b2" , "c3" ])
1580
- s4 = pd .Series ([b"a1" , b"b2" , b"c3" ])
1581
- check (assert_type (s .str .capitalize (), "pd.Series[str]" ), pd .Series , str )
1582
- check (assert_type (s .str .casefold (), "pd.Series[str]" ), pd .Series , str )
1583
- check (assert_type (s .str .cat (sep = "X" ), str ), str )
1584
- check (assert_type (s .str .center (10 ), "pd.Series[str]" ), pd .Series , str )
1585
- check (assert_type (s .str .contains ("a" ), "pd.Series[bool]" ), pd .Series , np .bool_ )
1586
- check (
1587
- assert_type (s .str .contains (re .compile (r"a" )), "pd.Series[bool]" ),
1588
- pd .Series ,
1589
- np .bool_ ,
1590
- )
1591
- check (assert_type (s .str .count ("pp" ), "pd.Series[int]" ), pd .Series , np .integer )
1592
- check (assert_type (s4 .str .decode ("utf-8" ), "pd.Series[str]" ), pd .Series , str )
1593
- check (assert_type (s .str .encode ("latin-1" ), "pd.Series[bytes]" ), pd .Series , bytes )
1594
- check (assert_type (s .str .endswith ("e" ), "pd.Series[bool]" ), pd .Series , np .bool_ )
1595
- check (
1596
- assert_type (s .str .endswith (("e" , "f" )), "pd.Series[bool]" ), pd .Series , np .bool_
1597
- )
1598
- check (assert_type (s3 .str .extract (r"([ab])?(\d)" ), pd .DataFrame ), pd .DataFrame )
1599
- check (assert_type (s3 .str .extractall (r"([ab])?(\d)" ), pd .DataFrame ), pd .DataFrame )
1600
- check (assert_type (s .str .find ("p" ), "pd.Series[int]" ), pd .Series , np .int64 )
1601
- check (assert_type (s .str .findall ("pp" ), "pd.Series[list[str]]" ), pd .Series , list )
1602
- check (assert_type (s .str .fullmatch ("apple" ), "pd.Series[bool]" ), pd .Series , np .bool_ )
1603
- check (assert_type (s .str .get (2 ), "pd.Series[str]" ), pd .Series , str )
1604
- check (assert_type (s .str .get_dummies (), pd .DataFrame ), pd .DataFrame )
1605
- check (assert_type (s .str .index ("p" ), "pd.Series[int]" ), pd .Series , np .int64 )
1606
- check (assert_type (s .str .isalnum (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1607
- check (assert_type (s .str .isalpha (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1608
- check (assert_type (s .str .isdecimal (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1609
- check (assert_type (s .str .isdigit (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1610
- check (assert_type (s .str .isnumeric (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1611
- check (assert_type (s .str .islower (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1612
- check (assert_type (s .str .isspace (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1613
- check (assert_type (s .str .istitle (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1614
- check (assert_type (s .str .isupper (), "pd.Series[bool]" ), pd .Series , np .bool_ )
1615
- check (assert_type (s2 .str .join ("-" ), pd .Series ), pd .Series )
1616
- check (assert_type (s .str .len (), "pd.Series[int]" ), pd .Series , np .integer )
1617
- check (assert_type (s .str .ljust (80 ), "pd.Series[str]" ), pd .Series , str )
1618
- check (assert_type (s .str .lower (), "pd.Series[str]" ), pd .Series , str )
1619
- check (assert_type (s .str .lstrip ("a" ), "pd.Series[str]" ), pd .Series , str )
1620
- check (assert_type (s .str .match ("pp" ), "pd.Series[bool]" ), pd .Series , np .bool_ )
1621
- check (assert_type (s .str .normalize ("NFD" ), "pd.Series[str]" ), pd .Series , str )
1622
- check (assert_type (s .str .pad (80 , "right" ), "pd.Series[str]" ), pd .Series , str )
1623
- check (assert_type (s .str .partition ("p" ), pd .DataFrame ), pd .DataFrame )
1624
- check (assert_type (s .str .removeprefix ("a" ), "pd.Series[str]" ), pd .Series , str )
1625
- check (assert_type (s .str .removesuffix ("e" ), "pd.Series[str]" ), pd .Series , str )
1626
- check (assert_type (s .str .repeat (2 ), "pd.Series[str]" ), pd .Series , str )
1627
- check (assert_type (s .str .replace ("a" , "X" ), "pd.Series[str]" ), pd .Series , str )
1628
- check (assert_type (s .str .rfind ("e" ), "pd.Series[int]" ), pd .Series , np .int64 )
1629
- check (assert_type (s .str .rindex ("p" ), "pd.Series[int]" ), pd .Series , np .int64 )
1630
- check (assert_type (s .str .rjust (80 ), "pd.Series[str]" ), pd .Series , str )
1631
- check (assert_type (s .str .rpartition ("p" ), pd .DataFrame ), pd .DataFrame )
1632
- check (assert_type (s .str .rsplit ("a" ), "pd.Series[list[str]]" ), pd .Series , list )
1633
- check (assert_type (s .str .rsplit ("a" , expand = True ), pd .DataFrame ), pd .DataFrame )
1634
- check (
1635
- assert_type (s .str .rsplit ("a" , expand = False ), "pd.Series[list[str]]" ),
1636
- pd .Series ,
1637
- list ,
1638
- )
1639
- check (assert_type (s .str .rstrip (), "pd.Series[str]" ), pd .Series , str )
1640
- check (assert_type (s .str .slice (0 , 4 , 2 ), "pd.Series[str]" ), pd .Series , str )
1641
- check (
1642
- assert_type (s .str .slice_replace (0 , 2 , "XX" ), "pd.Series[str]" ), pd .Series , str
1643
- )
1644
- check (assert_type (s .str .split ("a" ), "pd.Series[list[str]]" ), pd .Series , list )
1645
- # GH 194
1646
- check (assert_type (s .str .split ("a" , expand = True ), pd .DataFrame ), pd .DataFrame )
1647
- check (
1648
- assert_type (s .str .split ("a" , expand = False ), "pd.Series[list[str]]" ),
1649
- pd .Series ,
1650
- list ,
1651
- )
1652
- check (assert_type (s .str .startswith ("a" ), "pd.Series[bool]" ), pd .Series , np .bool_ )
1653
- check (
1654
- assert_type (s .str .startswith (("a" , "b" )), "pd.Series[bool]" ),
1655
- pd .Series ,
1656
- np .bool_ ,
1657
- )
1658
- check (assert_type (s .str .strip (), "pd.Series[str]" ), pd .Series , str )
1659
- check (assert_type (s .str .swapcase (), "pd.Series[str]" ), pd .Series , str )
1660
- check (assert_type (s .str .title (), "pd.Series[str]" ), pd .Series , str )
1661
- check (
1662
- assert_type (s .str .translate ({241 : "n" }), "pd.Series[str]" ),
1663
- pd .Series ,
1664
- str ,
1665
- )
1666
- check (assert_type (s .str .upper (), "pd.Series[str]" ), pd .Series , str )
1667
- check (assert_type (s .str .wrap (80 ), "pd.Series[str]" ), pd .Series , str )
1668
- check (assert_type (s .str .zfill (10 ), "pd.Series[str]" ), pd .Series , str )
1669
-
1670
-
1671
- def test_series_overloads_cat ():
1672
- s = pd .Series (
1673
- ["applep" , "bananap" , "Cherryp" , "DATEp" , "eGGpLANTp" , "123p" , "23.45p" ]
1674
- )
1675
- check (assert_type (s .str .cat (sep = ";" ), str ), str )
1676
- check (assert_type (s .str .cat (None , sep = ";" ), str ), str )
1677
- check (
1678
- assert_type (
1679
- s .str .cat (["A" , "B" , "C" , "D" , "E" , "F" , "G" ], sep = ";" ),
1680
- "pd.Series[str]" ,
1681
- ),
1682
- pd .Series ,
1683
- str ,
1684
- )
1685
- check (
1686
- assert_type (
1687
- s .str .cat (pd .Series (["A" , "B" , "C" , "D" , "E" , "F" , "G" ]), sep = ";" ),
1688
- "pd.Series[str]" ,
1689
- ),
1690
- pd .Series ,
1691
- str ,
1692
- )
1693
- unknown_s : UnknownSeries = pd .DataFrame ({"a" : ["a" , "b" ]})["a" ]
1694
- check (assert_type (s .str .cat (unknown_s , sep = ";" ), "pd.Series[str]" ), pd .Series , str )
1695
-
1696
-
1697
- def test_series_overloads_partition ():
1698
- s = pd .Series (
1699
- [
1700
- "ap;pl;ep" ,
1701
- "ban;an;ap" ,
1702
- "Che;rr;yp" ,
1703
- "DA;TEp" ,
1704
- "eGGp;LANT;p" ,
1705
- "12;3p" ,
1706
- "23.45p" ,
1707
- ]
1708
- )
1709
- check (assert_type (s .str .partition (sep = ";" ), pd .DataFrame ), pd .DataFrame )
1710
- check (
1711
- assert_type (s .str .partition (sep = ";" , expand = True ), pd .DataFrame ), pd .DataFrame
1712
- )
1713
- check (
1714
- assert_type (s .str .partition (sep = ";" , expand = False ), "pd.Series[type[object]]" ),
1715
- pd .Series ,
1716
- object ,
1717
- )
1718
-
1719
- check (assert_type (s .str .rpartition (sep = ";" ), pd .DataFrame ), pd .DataFrame )
1720
- check (
1721
- assert_type (s .str .rpartition (sep = ";" , expand = True ), pd .DataFrame ), pd .DataFrame
1722
- )
1723
- check (
1724
- assert_type (s .str .rpartition (sep = ";" , expand = False ), "pd.Series[type[object]]" ),
1725
- pd .Series ,
1726
- object ,
1727
- )
1728
-
1729
-
1730
1574
def test_series_overloads_extract ():
1731
1575
s = pd .Series (
1732
1576
["appl;ep" , "ban;anap" , "Cherr;yp" , "DATEp" , "eGGp;LANTp" , "12;3p" , "23.45p" ]
0 commit comments