Skip to content

Commit ff68407

Browse files
authored
Add Regex.to_embed/2 (#14379)
to_embed(regex,strict) returns an embeddable representation of regex. For instance ~r/foo/i can be represented as ~r/(?i-msx:foo)/. If the option :strict is true (the default) then it will throw an ArgumentError if the regex was compiled with an option/modifier which cannot be represented as an embeddable pattern. If :strict is false then any unembeddable options will be silently ignored. This may be perfectly reasonable, for intance the wrapped pattern may be compiled with the same modifiers as the pattern, or reusing the pattern without the unembeddable modifiers may not change its semantics.
1 parent 1c05790 commit ff68407

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed

lib/elixir/lib/regex.ex

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,82 @@ defmodule Regex do
412412
opts
413413
end
414414

415+
@doc """
416+
Returns the pattern as an embeddable string.
417+
418+
If the pattern was compiled with an option which cannot be represented
419+
as an embeddable modifier in the current version of PCRE and strict is true
420+
(the default) then an ArgumentError exception will be raised.
421+
422+
When the `:strict` option is false the pattern will be returned as though
423+
any offending options had not be used and the function will not raise any
424+
exceptions.
425+
426+
Embeddable modifiers/options are currently:
427+
428+
* 'i' - `:caseless`
429+
* 'm' - `:multiline`
430+
* 's' - `:dotall, {:newline, :anycrlf}`
431+
* 'x' - `:extended`
432+
433+
Unembeddable modifiers are:
434+
435+
* 'f' - `:firstline`
436+
* 'U' - `:ungreedy`
437+
* 'u' - `:unicode, :ucp`
438+
439+
Any other regex compilation option not listed here is considered unembeddable
440+
and will raise an exception unless the `:strict` option is false.
441+
442+
## Examples
443+
iex> Regex.to_embed(~r/foo/)
444+
"(?-imsx:foo)"
445+
446+
iex> Regex.to_embed(~r/^foo/m)
447+
"(?m-isx:^foo)"
448+
449+
iex> Regex.to_embed(~r/foo # comment/ix)
450+
"(?ix-ms:foo # comment\\n)"
451+
452+
iex> Regex.to_embed(~r/foo/iu)
453+
** (ArgumentError) regex compiled with options [:ucp, :unicode] which cannot be represented as an embedded pattern in this version of PCRE
454+
455+
iex> Regex.to_embed(~r/foo/imsxu, strict: false)
456+
"(?imsx:foo\\n)"
457+
458+
"""
459+
@doc since: "1.19.0"
460+
@spec to_embed(t, strict: boolean()) :: String.t()
461+
def to_embed(%Regex{source: source, opts: regex_opts}, embed_opts \\ []) do
462+
strict = Keyword.get(embed_opts, :strict, true)
463+
464+
modifiers =
465+
case embeddable_modifiers(regex_opts) do
466+
{:ok, modifiers} ->
467+
modifiers
468+
469+
{:error, modifiers, untranslatable} ->
470+
if strict do
471+
raise ArgumentError,
472+
"regex compiled with options #{inspect(untranslatable)} which cannot be " <>
473+
"represented as an embedded pattern in this version of PCRE"
474+
else
475+
modifiers
476+
end
477+
end
478+
479+
disabled = [?i, ?m, ?s, ?x] -- modifiers
480+
481+
disabled = if disabled != [], do: "-#{disabled}", else: ""
482+
483+
# Future proof option ordering consistency by sorting
484+
modifiers = Enum.sort(modifiers)
485+
486+
nl = if Enum.member?(regex_opts, :extended), do: "\n", else: ""
487+
488+
"(?#{modifiers}#{disabled}:#{source}#{nl})"
489+
end
490+
415491
@doc """
416492
Returns a list of names in the regex.
417493
@@ -845,6 +921,29 @@ defmodule Regex do
845921

846922
# Helpers
847923

924+
# translate options to modifiers as required for emedding
925+
defp embeddable_modifiers(list), do: embeddable_modifiers(list, [], [])
926+
927+
defp embeddable_modifiers([:dotall, {:newline, :anycrlf} | t], acc, err),
928+
do: embeddable_modifiers(t, [?s | acc], err)
929+
930+
defp embeddable_modifiers([:caseless | t], acc, err),
931+
do: embeddable_modifiers(t, [?i | acc], err)
932+
933+
defp embeddable_modifiers([:extended | t], acc, err),
934+
do: embeddable_modifiers(t, [?x | acc], err)
935+
936+
defp embeddable_modifiers([:multiline | t], acc, err),
937+
do: embeddable_modifiers(t, [?m | acc], err)
938+
939+
defp embeddable_modifiers([option | t], acc, err),
940+
do: embeddable_modifiers(t, acc, [option | err])
941+
942+
defp embeddable_modifiers([], acc, []), do: {:ok, acc}
943+
defp embeddable_modifiers([], acc, err), do: {:error, acc, err}
944+
945+
# translate modifers to options
946+
848947
defp translate_options(<<?s, t::binary>>, acc),
849948
do: translate_options(t, [:dotall, {:newline, :anycrlf} | acc])
850949

0 commit comments

Comments
 (0)