Skip to content

Commit 05431f6

Browse files
mikestokJosé Valim
authored and
José Valim
committed
keep tokenizer's column counts in sync for numbers with _ characters
For providing better feedback in credo and tools like it we should be able to map a token back to its original source. This makes sure that `_` characters in numbers are properly accounted for so that they stay in sync after we've encountered something like 123_456_789. Signed-off-by: José Valim <[email protected]>
1 parent 9e6592c commit 05431f6

File tree

2 files changed

+45
-35
lines changed

2 files changed

+45
-35
lines changed

lib/elixir/src/elixir_tokenizer.erl

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,15 @@ tokenize([], EndLine, _Column, #elixir_tokenizer{terminators=[{Start, {StartLine
135135
% Base integers
136136

137137
tokenize([$0, $x, H | T], Line, Column, Scope, Tokens) when ?is_hex(H) ->
138-
{Rest, Number, Length} = tokenize_hex([H | T], []),
138+
{Rest, Number, Length} = tokenize_hex(T, [H], 1),
139139
tokenize(Rest, Line, Column + 2 + Length, Scope, [{number, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
140140

141141
tokenize([$0, $b, H | T], Line, Column, Scope, Tokens) when ?is_bin(H) ->
142-
{Rest, Number, Length} = tokenize_bin([H | T], []),
142+
{Rest, Number, Length} = tokenize_bin(T, [H], 1),
143143
tokenize(Rest, Line, Column + 2 + Length, Scope, [{number, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
144144

145145
tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
146-
{Rest, Number, Length} = tokenize_octal([H | T], []),
146+
{Rest, Number, Length} = tokenize_octal(T, [H], 1),
147147
tokenize(Rest, Line, Column + 2 + Length, Scope, [{number, {Line, Column, Column + 2 + Length}, Number} | Tokens]);
148148

149149
% Comments
@@ -413,8 +413,8 @@ tokenize([$. | T], Line, Column, Scope, Tokens) ->
413413

414414
% Integers and floats
415415

416-
tokenize([H | _] = String, Line, Column, Scope, Tokens) when ?is_digit(H) ->
417-
{Rest, Number, Length} = tokenize_number(String, [], false),
416+
tokenize([H | T], Line, Column, Scope, Tokens) when ?is_digit(H) ->
417+
{Rest, Number, Length} = tokenize_number(T, [H], 1, false),
418418
tokenize(Rest, Line, Column + Length, Scope, [{number, {Line, Column, Column + Length}, Number} | Tokens]);
419419

420420
% Identifiers (including aliases)
@@ -733,46 +733,55 @@ extract_heredoc_line(Marker, Rest, Buffer, _Counter) ->
733733
%% At this point, we are at least sure the first digit is a number.
734734

735735
%% Check if we have a point followed by a number;
736-
tokenize_number([$., H | T], Acc, false) when ?is_digit(H) ->
737-
tokenize_number(T, [H, $. | Acc], true);
736+
tokenize_number([$., H | T], Acc, Length, false) when ?is_digit(H) ->
737+
tokenize_number(T, [H, $. | Acc], Length + 2, true);
738738

739739
%% Check if we have an underscore followed by a number;
740-
tokenize_number([$_, H | T], Acc, Bool) when ?is_digit(H) ->
741-
tokenize_number(T, [H | Acc], Bool);
740+
tokenize_number([$_, H | T], Acc, Length, Bool) when ?is_digit(H) ->
741+
tokenize_number(T, [H | Acc], Length + 2, Bool);
742742

743743
%% Check if we have e- followed by numbers (valid only for floats);
744-
tokenize_number([E, S, H | T], Acc, true)
744+
tokenize_number([E, S, H | T], Acc, Length, true)
745745
when (E == $E) or (E == $e), ?is_digit(H), S == $+ orelse S == $- ->
746-
tokenize_number(T, [H, S, $e | Acc], true);
746+
tokenize_number(T, [H, S, $e | Acc], Length + 3, true);
747747

748748
%% Check if we have e followed by numbers (valid only for floats);
749-
tokenize_number([E, H | T], Acc, true)
749+
tokenize_number([E, H | T], Acc, Length, true)
750750
when (E == $E) or (E == $e), ?is_digit(H) ->
751-
tokenize_number(T, [H, $e | Acc], true);
751+
tokenize_number(T, [H, $e | Acc], Length + 2, true);
752752

753753
%% Finally just numbers.
754-
tokenize_number([H | T], Acc, Bool) when ?is_digit(H) ->
755-
tokenize_number(T, [H | Acc], Bool);
754+
tokenize_number([H | T], Acc, Length, Bool) when ?is_digit(H) ->
755+
tokenize_number(T, [H | Acc], Length + 1, Bool);
756756

757757
%% Cast to float...
758-
tokenize_number(Rest, Acc, true) ->
759-
{Rest, list_to_float(lists:reverse(Acc)), length(Acc)};
758+
tokenize_number(Rest, Acc, Length, true) ->
759+
{Rest, list_to_float(lists:reverse(Acc)), Length};
760760

761761
%% Or integer.
762-
tokenize_number(Rest, Acc, false) ->
763-
{Rest, list_to_integer(lists:reverse(Acc)), length(Acc)}.
764-
765-
tokenize_hex([H | T], Acc) when ?is_hex(H) -> tokenize_hex(T, [H | Acc]);
766-
tokenize_hex([$_, H | T], Acc) when ?is_hex(H) -> tokenize_hex(T, [H | Acc]);
767-
tokenize_hex(Rest, Acc) -> {Rest, list_to_integer(lists:reverse(Acc), 16), length(Acc)}.
768-
769-
tokenize_octal([H | T], Acc) when ?is_octal(H) -> tokenize_octal(T, [H | Acc]);
770-
tokenize_octal([$_, H | T], Acc) when ?is_octal(H) -> tokenize_octal(T, [H | Acc]);
771-
tokenize_octal(Rest, Acc) -> {Rest, list_to_integer(lists:reverse(Acc), 8), length(Acc)}.
772-
773-
tokenize_bin([H | T], Acc) when ?is_bin(H) -> tokenize_bin(T, [H | Acc]);
774-
tokenize_bin([$_, H | T], Acc) when ?is_bin(H) -> tokenize_bin(T, [H | Acc]);
775-
tokenize_bin(Rest, Acc) -> {Rest, list_to_integer(lists:reverse(Acc), 2), length(Acc)}.
762+
tokenize_number(Rest, Acc, Length, false) ->
763+
{Rest, list_to_integer(lists:reverse(Acc)), Length}.
764+
765+
tokenize_hex([H | T], Acc, Length) when ?is_hex(H) ->
766+
tokenize_hex(T, [H | Acc], Length + 1);
767+
tokenize_hex([$_, H | T], Acc, Length) when ?is_hex(H) ->
768+
tokenize_hex(T, [H | Acc], Length + 2);
769+
tokenize_hex(Rest, Acc, Length) ->
770+
{Rest, list_to_integer(lists:reverse(Acc), 16), Length}.
771+
772+
tokenize_octal([H | T], Acc, Length) when ?is_octal(H) ->
773+
tokenize_octal(T, [H | Acc], Length + 1);
774+
tokenize_octal([$_, H | T], Acc, Length) when ?is_octal(H) ->
775+
tokenize_octal(T, [H | Acc], Length + 2);
776+
tokenize_octal(Rest, Acc, Length) ->
777+
{Rest, list_to_integer(lists:reverse(Acc), 8), Length}.
778+
779+
tokenize_bin([H | T], Acc, Length) when ?is_bin(H) ->
780+
tokenize_bin(T, [H | Acc], Length + 1);
781+
tokenize_bin([$_, H | T], Acc, Length) when ?is_bin(H) ->
782+
tokenize_bin(T, [H | Acc], Length + 2);
783+
tokenize_bin(Rest, Acc, Length) ->
784+
{Rest, list_to_integer(lists:reverse(Acc), 2), Length}.
776785

777786
%% Comments
778787

lib/elixir/test/erlang/tokenizer_test.erl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ op_kw_test() ->
2525
[{atom, {1, 1, 5}, foo}, {dual_op, {1, 5, 6}, '+'}, {atom, {1, 6, 10}, bar}] = tokenize(":foo+:bar").
2626

2727
scientific_test() ->
28-
[{number, {1, 1, 7}, 0.1}] = tokenize("1.0e-1").
28+
[{number, {1, 1, 7}, 0.1}] = tokenize("1.0e-1"),
29+
[{number, {1, 1, 16}, 1.2345678e-7}] = tokenize("1_234.567_8e-10").
2930

3031
hex_bin_octal_test() ->
3132
[{number, {1, 1, 5}, 255}] = tokenize("0xFF"),
32-
[{number, {1, 1, 5}, 255}] = tokenize("0xF_F"),
33+
[{number, {1, 1, 6}, 255}] = tokenize("0xF_F"),
3334
[{number, {1, 1, 5}, 63}] = tokenize("0o77"),
34-
[{number, {1, 1, 5}, 63}] = tokenize("0o7_7"),
35+
[{number, {1, 1, 6}, 63}] = tokenize("0o7_7"),
3536
[{number, {1, 1, 5}, 3}] = tokenize("0b11"),
36-
[{number, {1, 1, 5}, 3}] = tokenize("0b1_1").
37+
[{number, {1, 1, 6}, 3}] = tokenize("0b1_1").
3738

3839
unquoted_atom_test() ->
3940
[{atom, {1, 1, 3}, '+'}] = tokenize(":+"),

0 commit comments

Comments
 (0)