Skip to content

Commit 496cb2c

Browse files
authored
Add support for sigils containing integers (#13448)
1 parent 26d18e5 commit 496cb2c

File tree

4 files changed

+41
-12
lines changed

4 files changed

+41
-12
lines changed

lib/elixir/pages/getting-started/sigils.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,6 @@ iex> ~i(42)n
235235
-42
236236
```
237237
238-
Custom sigils may be either a single lowercase character or several uppercase characters.
238+
Custom sigils may be either a single lowercase character, or an uppercase character followed by more uppercase characters and digits.
239239
240240
Sigils can also be used to do compile-time work with the help of macros. For example, regular expressions in Elixir are compiled into an efficient representation during compilation of the source code, therefore skipping this step at runtime. If you're interested in the subject, you can learn more about macros and check out how sigils are implemented in the `Kernel` module (where the `sigil_*` functions are defined).

lib/elixir/src/elixir_import.erl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,10 @@ is_sigil({Name, 2}) ->
157157
case Letters of
158158
[L] when L >= $a, L =< $z -> true;
159159
[] -> false;
160-
Letters -> lists:all(fun(L) -> L >= $A andalso L =< $Z end, Letters)
160+
[H|T] when H >= $A, H =< $Z ->
161+
lists:all(fun(L) -> (L >= $0 andalso L =< $9)
162+
orelse (L>= $A andalso L =< $Z)
163+
end, T)
161164
end;
162165
_ ->
163166
false

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,20 +1559,33 @@ tokenize_sigil([$~ | T], Line, Column, Scope, Tokens) ->
15591559
end.
15601560

15611561
% A one-letter sigil is ok both as upcase as well as downcase.
1562-
tokenize_sigil_name([S | T], [], Line, Column, Scope, Tokens) when ?is_upcase(S) orelse ?is_downcase(S) ->
1563-
tokenize_sigil_name(T, [S], Line, Column + 1, Scope, Tokens);
1562+
tokenize_sigil_name([S | T], [], Line, Column, Scope, Tokens) when ?is_downcase(S) ->
1563+
tokenize_lower_sigil_name(T, [S], Line, Column + 1, Scope, Tokens);
1564+
tokenize_sigil_name([S | T], [], Line, Column, Scope, Tokens) when ?is_upcase(S) ->
1565+
tokenize_upper_sigil_name(T, [S], Line, Column + 1, Scope, Tokens).
1566+
1567+
tokenize_lower_sigil_name([S | _T] = Original, [_ | _] = NameAcc, _Line, _Column, _Scope, _Tokens) when ?is_downcase(S) ->
1568+
SigilName = lists:reverse(NameAcc) ++ Original,
1569+
{error, sigil_name_error(), [$~] ++ SigilName};
1570+
tokenize_lower_sigil_name(T, NameAcc, Line, Column, Scope, Tokens) ->
1571+
{ok, lists:reverse(NameAcc), T, Line, Column, Scope, Tokens}.
1572+
15641573
% If we have an uppercase letter, we keep tokenizing the name.
1565-
tokenize_sigil_name([S | T], NameAcc, Line, Column, Scope, Tokens) when ?is_upcase(S) ->
1566-
tokenize_sigil_name(T, [S | NameAcc], Line, Column + 1, Scope, Tokens);
1574+
% A digit is allowed but an uppercase letter or digit must proceed it.
1575+
tokenize_upper_sigil_name([S | T], NameAcc, Line, Column, Scope, Tokens) when ?is_upcase(S); ?is_digit(S) ->
1576+
tokenize_upper_sigil_name(T, [S | NameAcc], Line, Column + 1, Scope, Tokens);
15671577
% With a lowercase letter and a non-empty NameAcc we return an error.
1568-
tokenize_sigil_name([S | _T] = Original, [_ | _] = NameAcc, _Line, _Column, _Scope, _Tokens) when ?is_downcase(S) ->
1569-
Message = "invalid sigil name, it should be either a one-letter lowercase letter or a" ++
1570-
" sequence of uppercase letters only, got: ",
1571-
{error, Message, [$~] ++ lists:reverse(NameAcc) ++ Original};
1578+
tokenize_upper_sigil_name([S | _T] = Original, [_ | _] = NameAcc, _Line, _Column, _Scope, _Tokens) when ?is_downcase(S) ->
1579+
SigilName = lists:reverse(NameAcc) ++ Original,
1580+
{error, sigil_name_error(), [$~] ++ SigilName};
15721581
% We finished the letters, so the name is over.
1573-
tokenize_sigil_name(T, NameAcc, Line, Column, Scope, Tokens) ->
1582+
tokenize_upper_sigil_name(T, NameAcc, Line, Column, Scope, Tokens) ->
15741583
{ok, lists:reverse(NameAcc), T, Line, Column, Scope, Tokens}.
15751584

1585+
sigil_name_error() ->
1586+
"invalid sigil name, it should be either a one-letter lowercase letter or an " ++
1587+
"uppercase letter optionally followed by uppercase letters and digits, got: ".
1588+
15761589
tokenize_sigil_contents([H, H, H | T] = Original, [S | _] = SigilName, Line, Column, Scope, Tokens)
15771590
when ?is_quote(H) ->
15781591
case extract_heredoc_with_interpolation(Line, Column, Scope, ?is_downcase(S), T, H) of

lib/elixir/test/elixir/kernel/parser_test.exs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,26 @@ defmodule Kernel.ParserTest do
165165
meta = [delimiter: "\"\"\"", line: 1]
166166
args = {:sigil_MAT, meta, [{:<<>>, [indentation: 0, line: 1], ["1,2,3\n"]}, []]}
167167
assert string_to_quoted.("~MAT\"\"\"\n1,2,3\n\"\"\"") == args
168+
169+
args = {:sigil_FOO1, meta, [{:<<>>, [indentation: 0, line: 1], ["1,2,3\n"]}, []]}
170+
assert string_to_quoted.("~FOO1\"\"\"\n1,2,3\n\"\"\"") == args
171+
172+
args = {:sigil_BAR321, meta, [{:<<>>, [indentation: 0, line: 1], ["1,2,3\n"]}, []]}
173+
assert string_to_quoted.("~BAR321\"\"\"\n1,2,3\n\"\"\"") == args
174+
175+
args = {:sigil_I18N, meta, [{:<<>>, [indentation: 0, line: 1], ["1,2,3\n"]}, []]}
176+
assert string_to_quoted.("~I18N\"\"\"\n1,2,3\n\"\"\"") == args
168177
end
169178

170179
test "invalid multi-letter sigils" do
171180
msg =
172-
~r/invalid sigil name, it should be either a one-letter lowercase letter or a sequence of uppercase letters only/
181+
~r/invalid sigil name, it should be either a one-letter lowercase letter or an uppercase letter optionally followed by uppercase letters and digits/
173182

174183
assert_syntax_error(["nofile:1:1:", msg], "~Regex/foo/")
184+
185+
assert_syntax_error(["nofile:1:1:", msg], "~FOo1{bar]")
186+
187+
assert_syntax_error(["nofile:1:1:", msg], "~foo1{bar]")
175188
end
176189

177190
test "sigil newlines" do

0 commit comments

Comments
 (0)