@@ -4640,6 +4640,95 @@ def _reindex_with_indexers(
4640
4640
4641
4641
return self ._constructor (new_data ).__finalize__ (self )
4642
4642
4643
+ def select_str (
4644
+ self , * , startswith = None , endswith = None , regex = None , flags = 0 , axis = None
4645
+ ):
4646
+ """
4647
+ Selects rows or columns of dataframe according to string labels in
4648
+ the specified index.
4649
+
4650
+ Notes
4651
+ -----
4652
+ Only one of keywords arguments `startswith`, `endswith` and `regex` can be used.
4653
+
4654
+ Parameters
4655
+ ----------
4656
+ startswith: str, optional
4657
+ Test if the start of each string element matches a pattern.
4658
+ Equivalent to :meth:`str.startswith`.
4659
+ endswith: str, optional
4660
+ Test if the end of each string element matches a pattern.
4661
+ Equivalent to :meth:`str.endsswith`.
4662
+ regex : str, optional
4663
+ Keep labels from axis for which re.search(regex, label) is True.
4664
+ flags : int, default 0 (no flags)
4665
+ re module flags, e.g. re.IGNORECASE. Can only be used with parameter regex.
4666
+ axis : int or string axis name
4667
+ The axis to filter on. By default this is the info axis,
4668
+ 'index' for Series, 'columns' for DataFrame.
4669
+
4670
+ Returns
4671
+ -------
4672
+ same type as input object
4673
+
4674
+ See Also
4675
+ --------
4676
+ DataFrame.loc
4677
+ DataFrame.select_dtypes
4678
+
4679
+ ``axis`` defaults to the info axis that is used when indexing
4680
+ with ``[]``.
4681
+
4682
+ Examples
4683
+ --------
4684
+ >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
4685
+ ... index=['mouse', 'rabbit'],
4686
+ ... columns=['one', 'two', 'three'])
4687
+
4688
+ >>> df.select_str(startswith='t')
4689
+ two three
4690
+ mouse 2 3
4691
+ rabbit 5 6
4692
+
4693
+ >>> # select columns by regular expression
4694
+ >>> df.select_str(regex=r'e$', axis=1)
4695
+ one three
4696
+ mouse 1 3
4697
+ rabbit 4 6
4698
+
4699
+ >>> # select rows containing 'bbi'
4700
+ >>> df.select_str(regex=r'bbi', axis=0)
4701
+ one two three
4702
+ rabbit 4 5 6
4703
+ """
4704
+ import re
4705
+
4706
+ num_kw = com .count_not_none (startswith , endswith , regex )
4707
+ if num_kw != 1 :
4708
+ raise TypeError (
4709
+ "Only one of keywords arguments `startswith`, `endswith` and "
4710
+ "`regex` can be used."
4711
+ )
4712
+ if regex is None and flags != 0 :
4713
+ raise ValueError ("Can only be used togehter with parameter 'regex'" )
4714
+
4715
+ if axis is None :
4716
+ axis = self ._info_axis_name
4717
+ labels = self ._get_axis (axis )
4718
+
4719
+ if startswith is not None :
4720
+ mapped = labels .str .startswith (startswith )
4721
+ elif endswith is not None :
4722
+ mapped = labels .str .endsswith (endswith )
4723
+ else : # regex
4724
+ matcher = re .compile (regex , flags = flags )
4725
+
4726
+ def f (x ):
4727
+ return matcher .search (x ) is not None
4728
+
4729
+ mapped = labels .map (f )
4730
+ return self .loc (axis = axis )[mapped ]
4731
+
4643
4732
def filter (self , items = None , like = None , regex = None , axis = None ):
4644
4733
"""
4645
4734
Subset rows or columns of dataframe according to labels in
0 commit comments