@@ -1095,24 +1095,88 @@ def str_pad(arr, width, side='left', fillchar=' '):
1095
1095
1096
1096
def str_split (arr , pat = None , n = None ):
1097
1097
"""
1098
- Split each string (a la re.split) in the Series/Index by given
1099
- pattern, propagating NA values. Equivalent to :meth:`str.split`.
1098
+ Split strings around given separator/delimiter.
1099
+
1100
+ Split each string in the caller's values by given
1101
+ pattern, propagating NaN values. Equivalent to :meth:`str.split`.
1100
1102
1101
1103
Parameters
1102
1104
----------
1103
- pat : string, default None
1104
- String or regular expression to split on. If None, splits on whitespace
1105
+ pat : str, optional
1106
+ String or regular expression to split on.
1107
+ If not specified, split on whitespace.
1105
1108
n : int, default -1 (all)
1106
- None, 0 and -1 will be interpreted as return all splits
1109
+ Limit number of splits in output.
1110
+ ``None``, 0 and -1 will be interpreted as return all splits.
1107
1111
expand : bool, default False
1108
- * If True, return DataFrame/MultiIndex expanding dimensionality.
1109
- * If False, return Series/Index.
1112
+ Expand the splitted strings into separate columns.
1110
1113
1111
- return_type : deprecated, use `expand`
1114
+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
1115
+ * If ``False``, return Series/Index, containing lists of strings.
1112
1116
1113
1117
Returns
1114
1118
-------
1115
1119
split : Series/Index or DataFrame/MultiIndex of objects
1120
+ Type matches caller unless ``expand=True`` (return type is DataFrame or
1121
+ MultiIndex)
1122
+
1123
+ Notes
1124
+ -----
1125
+ The handling of the `n` keyword depends on the number of found splits:
1126
+
1127
+ - If found splits > `n`, make first `n` splits only
1128
+ - If found splits <= `n`, make all splits
1129
+ - If for a certain row the number of found splits < `n`,
1130
+ append `None` for padding up to `n` if ``expand=True``
1131
+
1132
+ Examples
1133
+ --------
1134
+ >>> s = pd.Series(["this is good text", "but this is even better"])
1135
+
1136
+ By default, split will return an object of the same size
1137
+ having lists containing the split elements
1138
+
1139
+ >>> s.str.split()
1140
+ 0 [this, is, good, text]
1141
+ 1 [but, this, is, even, better]
1142
+ dtype: object
1143
+ >>> s.str.split("random")
1144
+ 0 [this is good text]
1145
+ 1 [but this is even better]
1146
+ dtype: object
1147
+
1148
+ When using ``expand=True``, the split elements will
1149
+ expand out into separate columns.
1150
+
1151
+ >>> s.str.split(expand=True)
1152
+ 0 1 2 3 4
1153
+ 0 this is good text None
1154
+ 1 but this is even better
1155
+ >>> s.str.split(" is ", expand=True)
1156
+ 0 1
1157
+ 0 this good text
1158
+ 1 but this even better
1159
+
1160
+ Parameter `n` can be used to limit the number of splits in the output.
1161
+
1162
+ >>> s.str.split("is", n=1)
1163
+ 0 [th, is good text]
1164
+ 1 [but th, is even better]
1165
+ dtype: object
1166
+ >>> s.str.split("is", n=1, expand=True)
1167
+ 0 1
1168
+ 0 th is good text
1169
+ 1 but th is even better
1170
+
1171
+ If NaN is present, it is propagated throughout the columns
1172
+ during the split.
1173
+
1174
+ >>> s = pd.Series(["this is good text", "but this is even better", np.nan])
1175
+ >>> s.str.split(n=3, expand=True)
1176
+ 0 1 2 3
1177
+ 0 this is good text
1178
+ 1 but this is even better
1179
+ 2 NaN NaN NaN NaN
1116
1180
"""
1117
1181
if pat is None :
1118
1182
if n is None or n == 0 :
0 commit comments