@@ -169,11 +169,34 @@ defmodule String do
169
169
when retrieving data from the external source. For example, a
170
170
driver that reads strings from a database will be the one
171
171
responsible to check the validity of the encoding.
172
+
173
+ ## Patterns
174
+
175
+ Many functions in this module work with patterns. For example,
176
+ String.split/2 can split a string into multiple patterns given
177
+ a pattern. This pattern can be a string, a list of strings or
178
+ a compiled pattern:
179
+
180
+ iex> String.split("foo bar", " ")
181
+ ["foo", "bar"]
182
+
183
+ iex> String.split("foo bar!", [" ", "!"])
184
+ ["foo", "bar", ""]
185
+
186
+ iex> pattern = :binary.compile_pattern([" ", "!"])
187
+ iex> String.split("foo bar!", pattern)
188
+ ["foo", "bar", ""]
189
+
190
+ The compiled pattern is useful when the same match will
191
+ be done over and oever again. Note though the compiled
192
+ pattern cannot be stored in a module attribute as the pattern
193
+ is generated at runtime and does not survive compile term.
172
194
"""
173
195
174
196
@ type t :: binary
175
197
@ type codepoint :: t
176
198
@ type grapheme :: t
199
+ @ type pattern :: t | [ t ] | :binary . cp
177
200
178
201
@ doc """
179
202
Checks if a string is printable considering it is encoded
@@ -285,66 +308,100 @@ defmodule String do
285
308
iex> String.split("abc", "", parts: 2)
286
309
["a", "bc"]
287
310
311
+ A precompiled pattern can also be given:
312
+
313
+ iex> pattern = :binary.compile_pattern([" ", ","])
314
+ iex> String.split("1,2 3,4", pattern)
315
+ ["1", "2", "3", "4"]
316
+
288
317
"""
289
- @ spec split ( t , t | [ t ] | Regex . t ) :: [ t ]
290
- @ spec split ( t , t | [ t ] | Regex . t , Keyword . t ) :: [ t ]
318
+ @ spec split ( t , pattern | Regex . t ) :: [ t ]
319
+ @ spec split ( t , pattern | Regex . t , Keyword . t ) :: [ t ]
291
320
def split ( string , pattern , options \\ [ ] )
292
321
293
- def split ( string , "" , options ) do
294
- parts = Keyword . get ( options , :parts , :infinity )
295
- split_codepoints ( string , parts_to_index ( parts ) , Keyword . get ( options , :trim , false ) )
322
+ def split ( string , % Regex { } = pattern , options ) do
323
+ Regex . split ( pattern , string , options )
324
+ end
325
+
326
+ def split ( string , pattern , [ ] ) when pattern != "" do
327
+ :binary . split ( string , pattern , [ :global ] )
296
328
end
297
329
298
330
def split ( string , pattern , options ) do
299
- if Regex . regex? ( pattern ) do
300
- Regex . split ( pattern , string , options )
301
- else
302
- parts = Keyword . get ( options , :parts , :infinity )
303
- trim = Keyword . get ( options , :trim , false )
304
- if parts == :infinity and trim == false do
305
- :binary . split ( string , pattern , [ :global ] )
306
- else
307
- split_parts ( string , pattern , parts_to_index ( parts ) , trim )
308
- end
309
- end
331
+ parts = Keyword . get ( options , :parts , :infinity )
332
+ trim = Keyword . get ( options , :trim , false )
333
+ pattern = maybe_compile_pattern ( pattern )
334
+ split_each ( string , pattern , trim , parts_to_index ( parts ) )
310
335
end
311
336
312
337
defp parts_to_index ( :infinity ) , do: 0
313
338
defp parts_to_index ( n ) when is_integer ( n ) and n > 0 , do: n
314
339
315
- defp split_codepoints ( binary , 1 , _trim ) , do: [ binary ]
316
- defp split_codepoints ( << h :: utf8 , t :: binary >> , count , trim ) ,
317
- do: [ << h :: utf8 >> | split_codepoints ( t , count - 1 , trim ) ]
318
- defp split_codepoints ( << h , t :: binary >> , count , trim ) ,
319
- do: [ << h >> | split_codepoints ( t , count - 1 , trim ) ]
320
- defp split_codepoints ( << >> , _ , true ) , do: [ ]
321
- defp split_codepoints ( << >> , _ , false ) , do: [ "" ]
322
-
323
- defp split_parts ( "" , _pattern , _num , true ) , do: [ ]
324
- defp split_parts ( "" , _pattern , _num , _trim ) , do: [ "" ]
325
- defp split_parts ( string , _pattern , 1 , _trim ) , do: [ string ]
326
- defp split_parts ( string , pattern , num , trim ) do
327
- case :binary . split ( string , pattern ) do
328
- [ "" ] when trim ->
329
- [ ]
330
- [ head ] ->
331
- [ head ]
332
- [ head , tail ] ->
333
- if trim and head == "" do
334
- split_parts ( tail , pattern , num , trim )
335
- else
336
- [ head | split_parts ( tail , pattern , num - 1 , trim ) ]
337
- end
340
+ defp split_each ( string , _pattern , _trim , 1 ) , do: [ string ]
341
+ defp split_each ( string , pattern , trim , count ) do
342
+ case do_splitter ( string , pattern , trim ) do
343
+ { h , t } -> [ h | split_each ( t , pattern , trim , count - 1 ) ]
344
+ nil -> [ ]
338
345
end
339
346
end
340
347
348
+ @ doc """
349
+ Splits a string on demand.
350
+
351
+ Returns an enumerable that splits the string on
352
+ demand, instead of splitting all data upfront.
353
+
354
+ Note splitter does not support regular expressions
355
+ (as it is often more efficient to have the regular
356
+ expressions traverse the string at once then in
357
+ multiple passes).
358
+
359
+ ## Options
360
+
361
+ * :trim - when true, does not emit empty patterns
362
+ """
363
+ @ spec splitter ( t , pattern , Keyword . t ) :: Enumerable . t
364
+ def splitter ( string , pattern , options \\ [ ] ) do
365
+ pattern = maybe_compile_pattern ( pattern )
366
+ trim = Keyword . get ( options , :trim , false )
367
+ Stream . unfold ( string , & do_splitter ( & 1 , pattern , trim ) )
368
+ end
369
+
370
+ defp do_splitter ( :nomatch , _pattern , _ ) , do: nil
371
+ defp do_splitter ( "" , _pattern , true ) , do: nil
372
+ defp do_splitter ( "" , _pattern , false ) , do: { "" , :nomatch }
373
+
374
+ defp do_splitter ( bin , "" , _trim ) do
375
+ next_grapheme ( bin )
376
+ end
377
+
378
+ defp do_splitter ( bin , pattern , trim ) do
379
+ case :binary . match ( bin , pattern ) do
380
+ { 0 , length } when trim ->
381
+ do_splitter ( :binary . part ( bin , length , byte_size ( bin ) - length ) , pattern , trim )
382
+ { pos , length } ->
383
+ final = pos + length
384
+ { :binary . part ( bin , 0 , pos ) ,
385
+ :binary . part ( bin , final , byte_size ( bin ) - final ) }
386
+ :nomatch ->
387
+ { bin , :nomatch }
388
+ end
389
+ end
390
+
391
+ defp maybe_compile_pattern ( "" ) , do: ""
392
+ defp maybe_compile_pattern ( pattern ) , do: :binary . compile_pattern ( pattern )
393
+
341
394
@ doc """
342
395
Splits a string into two at the specified offset. When the offset given is
343
396
negative, location is counted from the end of the string.
344
397
345
- The offset is capped to the length of the string.
398
+ The offset is capped to the length of the string. Returns a tuple with
399
+ two elements.
346
400
347
- Returns a tuple with two elements.
401
+ Note: keep in mind this function splits on graphemes and for such it
402
+ has to linearly traverse the string. If you want to split a string or
403
+ a binary based on the number of bytes, use `Kernel.binary_part/3`
404
+ instead.
348
405
349
406
## Examples
350
407
@@ -663,9 +720,7 @@ defmodule String do
663
720
"a[,,]b[,,]c"
664
721
665
722
"""
666
- @ spec replace ( t , t | Regex . t , t ) :: t
667
- @ spec replace ( t , t | Regex . t , t , Keyword . t ) :: t
668
-
723
+ @ spec replace ( t , pattern | Regex . t , t , Keyword . t ) :: t
669
724
def replace ( subject , pattern , replacement , options \\ [ ] ) when is_binary ( replacement ) do
670
725
if Regex . regex? ( pattern ) do
671
726
Regex . replace ( pattern , subject , replacement , global: options [ :global ] )
@@ -872,7 +927,6 @@ defmodule String do
872
927
do_chunk ( str , pred_fn . ( cp ) , pred_fn )
873
928
end
874
929
875
-
876
930
defp do_chunk ( str , flag , pred_fn ) , do: do_chunk ( str , [ ] , << >> , flag , pred_fn )
877
931
878
932
defp do_chunk ( << >> , acc , << >> , _ , _ ) , do: Enum . reverse ( acc )
@@ -1215,19 +1269,13 @@ defmodule String do
1215
1269
"""
1216
1270
@ spec starts_with? ( t , t | [ t ] ) :: boolean
1217
1271
1218
- def starts_with? ( string , prefixes ) when is_list ( prefixes ) do
1219
- Enum . any? ( prefixes , & do_starts_with ( string , & 1 ) )
1220
- end
1221
-
1222
- def starts_with? ( string , prefix ) do
1223
- do_starts_with ( string , prefix )
1224
- end
1225
-
1226
- defp do_starts_with ( string , "" ) when is_binary ( string ) do
1272
+ def starts_with? ( _string , "" ) do
1273
+ IO . puts :stderr , "[deprecation] Calling String.starts_with?/2 with an empty string is deprecated and " <>
1274
+ "will fail in the future\n " <> Exception . format_stacktrace ( )
1227
1275
true
1228
1276
end
1229
1277
1230
- defp do_starts_with ( string , prefix ) when is_binary ( prefix ) do
1278
+ def starts_with? ( string , prefix ) when is_list ( prefix ) or is_binary ( prefix ) do
1231
1279
Kernel . match? ( { 0 , _ } , :binary . match ( string , prefix ) )
1232
1280
end
1233
1281
@@ -1249,6 +1297,12 @@ defmodule String do
1249
1297
"""
1250
1298
@ spec ends_with? ( t , t | [ t ] ) :: boolean
1251
1299
1300
+ def ends_with? ( _string , "" ) do
1301
+ IO . puts :stderr , "[deprecation] Calling String.ends_with?/2 with an empty string is deprecated and " <>
1302
+ "will fail in the future\n " <> Exception . format_stacktrace ( )
1303
+ true
1304
+ end
1305
+
1252
1306
def ends_with? ( string , suffixes ) when is_list ( suffixes ) do
1253
1307
Enum . any? ( suffixes , & do_ends_with ( string , & 1 ) )
1254
1308
end
@@ -1257,10 +1311,6 @@ defmodule String do
1257
1311
do_ends_with ( string , suffix )
1258
1312
end
1259
1313
1260
- defp do_ends_with ( string , "" ) when is_binary ( string ) do
1261
- true
1262
- end
1263
-
1264
1314
defp do_ends_with ( string , suffix ) when is_binary ( suffix ) do
1265
1315
string_size = byte_size ( string )
1266
1316
suffix_size = byte_size ( suffix )
@@ -1301,23 +1351,23 @@ defmodule String do
1301
1351
iex> String.contains? "elixir of life", ["death", "mercury"]
1302
1352
false
1303
1353
1304
- """
1305
- @ spec contains? ( t , t | [ t ] ) :: boolean
1354
+ The argument can also be a precompiled pattern:
1306
1355
1307
- def contains? ( string , contents ) when is_list ( contents ) do
1308
- Enum . any? ( contents , & do_contains ( string , & 1 ) )
1309
- end
1356
+ iex> pattern = :binary.compile_pattern(["life", "death"])
1357
+ iex> String.contains? "elixir of life", pattern
1358
+ true
1310
1359
1311
- def contains? ( string , content ) do
1312
- do_contains ( string , content )
1313
- end
1360
+ """
1361
+ @ spec contains? ( t , pattern ) :: boolean
1314
1362
1315
- defp do_contains ( string , "" ) when is_binary ( string ) do
1363
+ def contains? ( _string , "" ) do
1364
+ IO . puts :stderr , "[deprecation] Calling String.contains?/2 with an empty string is deprecated and " <>
1365
+ "will fail in the future\n " <> Exception . format_stacktrace ( )
1316
1366
true
1317
1367
end
1318
1368
1319
- defp do_contains ( string , match ) when is_binary ( match ) do
1320
- :nomatch != : binary. match ( string , match )
1369
+ def contains? ( string , contents ) do
1370
+ :binary . match ( string , contents ) != :nomatch
1321
1371
end
1322
1372
1323
1373
@ doc """
0 commit comments