@@ -659,11 +659,11 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
659
659
Split strings around given separator/delimiter.
660
660
661
661
Splits the string in the Series/Index from the %(side)s,
662
- at the specified delimiter string. Equivalent to :meth:`str.%(method)s`.
662
+ at the specified delimiter string.
663
663
664
664
Parameters
665
665
----------
666
- pat : str, optional
666
+ pat : str or compiled regex , optional
667
667
String or regular expression to split on.
668
668
If not specified, split on whitespace.
669
669
n : int, default -1 (all)
@@ -672,14 +672,30 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
672
672
expand : bool, default False
673
673
Expand the split strings into separate columns.
674
674
675
- * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
676
- * If ``False``, return Series/Index, containing lists of strings.
675
+ - If ``True``, return DataFrame/MultiIndex expanding dimensionality.
676
+ - If ``False``, return Series/Index, containing lists of strings.
677
+
678
+ regex : bool, default None
679
+ Determines if the passed-in pattern is a regular expression:
680
+
681
+ - If ``True``, assumes the passed-in pattern is a regular expression
682
+ - If ``False``, treats the pattern as a literal string.
683
+ - If ``None`` and `pat` length is 1, treats `pat` as a literal string.
684
+ - If ``None`` and `pat` length is not 1, treats `pat` as a regular expression.
685
+ - Cannot be set to False if `pat` is a compiled regex
686
+
687
+ .. versionadded:: 1.4.0
677
688
678
689
Returns
679
690
-------
680
691
Series, Index, DataFrame or MultiIndex
681
692
Type matches caller unless ``expand=True`` (see Notes).
682
693
694
+ Raises
695
+ ------
696
+ ValueError
697
+ * if `regex` is False and `pat` is a compiled regex
698
+
683
699
See Also
684
700
--------
685
701
Series.str.split : Split strings around given separator/delimiter.
@@ -702,6 +718,9 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
702
718
If using ``expand=True``, Series and Index callers return DataFrame and
703
719
MultiIndex objects, respectively.
704
720
721
+ Use of `regex=False` with a `pat` as a compiled regex will raise
722
+ an error.
723
+
705
724
Examples
706
725
--------
707
726
>>> s = pd.Series(
@@ -776,22 +795,63 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
776
795
1 https://docs.python.org/3/tutorial index.html
777
796
2 NaN NaN
778
797
779
- Remember to escape special characters when explicitly using regular
780
- expressions.
798
+ Remember to escape special characters when explicitly using regular expressions.
781
799
782
- >>> s = pd.Series(["1+1=2"])
783
- >>> s
784
- 0 1+1=2
785
- dtype: object
786
- >>> s.str.split(r"\+|=", expand=True)
787
- 0 1 2
788
- 0 1 1 2
800
+ >>> s = pd.Series(["foo and bar plus baz"])
801
+ >>> s.str.split(r"and|plus", expand=True)
802
+ 0 1 2
803
+ 0 foo bar baz
804
+
805
+ Regular expressions can be used to handle urls or file names.
806
+ When `pat` is a string and ``regex=None`` (the default), the given `pat` is compiled
807
+ as a regex only if ``len(pat) != 1``.
808
+
809
+ >>> s = pd.Series(['foojpgbar.jpg'])
810
+ >>> s.str.split(r".", expand=True)
811
+ 0 1
812
+ 0 foojpgbar jpg
813
+
814
+ >>> s.str.split(r"\.jpg", expand=True)
815
+ 0 1
816
+ 0 foojpgbar
817
+
818
+ When ``regex=True``, `pat` is interpreted as a regex
819
+
820
+ >>> s.str.split(r"\.jpg", regex=True, expand=True)
821
+ 0 1
822
+ 0 foojpgbar
823
+
824
+ A compiled regex can be passed as `pat`
825
+
826
+ >>> import re
827
+ >>> s.str.split(re.compile(r"\.jpg"), expand=True)
828
+ 0 1
829
+ 0 foojpgbar
830
+
831
+ When ``regex=False``, `pat` is interpreted as the string itself
832
+
833
+ >>> s.str.split(r"\.jpg", regex=False, expand=True)
834
+ 0
835
+ 0 foojpgbar.jpg
789
836
"""
790
837
791
838
@Appender (_shared_docs ["str_split" ] % {"side" : "beginning" , "method" : "split" })
792
839
@forbid_nonstring_types (["bytes" ])
793
- def split (self , pat = None , n = - 1 , expand = False ):
794
- result = self ._data .array ._str_split (pat , n , expand )
840
+ def split (
841
+ self ,
842
+ pat : str | re .Pattern | None = None ,
843
+ n = - 1 ,
844
+ expand = False ,
845
+ * ,
846
+ regex : bool | None = None ,
847
+ ):
848
+ if regex is False and is_re (pat ):
849
+ raise ValueError (
850
+ "Cannot use a compiled regex as replacement pattern with regex=False"
851
+ )
852
+ if is_re (pat ):
853
+ regex = True
854
+ result = self ._data .array ._str_split (pat , n , expand , regex )
795
855
return self ._wrap_result (result , returns_string = expand , expand = expand )
796
856
797
857
@Appender (_shared_docs ["str_split" ] % {"side" : "end" , "method" : "rsplit" })
0 commit comments