@@ -48,12 +48,6 @@ defmodule Kernel.WarningTest do
48
48
end )
49
49
end
50
50
51
- defp capture_quoted ( source ) do
52
- capture_err ( fn ->
53
- Code . string_to_quoted! ( source , columns: true )
54
- end )
55
- end
56
-
57
51
defp capture_compile ( source ) do
58
52
capture_err ( fn ->
59
53
quoted = Code . string_to_quoted! ( source , columns: true )
@@ -93,13 +87,6 @@ defmodule Kernel.WarningTest do
93
87
end
94
88
95
89
describe "unicode identifier security" do
96
- test "prevents Restricted codepoints in identifiers" do
97
- exception = assert_raise SyntaxError , fn -> Code . string_to_quoted! ( "_shibㅤ = 1" ) end
98
-
99
- assert Exception . message ( exception ) =~
100
- "unexpected token: \" ㅤ\" (column 6, code point U+3164)"
101
- end
102
-
103
90
test "warns on confusables" do
104
91
assert_warn_quoted (
105
92
[ "nofile:1:6" , "confusable identifier: 'a' looks like 'а' on line 1" ] ,
@@ -161,105 +148,6 @@ defmodule Kernel.WarningTest do
161
148
] ,
162
149
"a_א1 or a_1א"
163
150
)
164
-
165
- # test that the implementation of String.Tokenizer.Security.unbidify/1 agrees
166
- # w/Unicode Bidi Algo (UAX9) for these (identifier-specific, no-bracket) examples
167
- #
168
- # you can create new examples with: https://util.unicode.org/UnicodeJsps/bidic.jsp?s=foo_%D9%84%D8%A7%D9%85%D8%AF%D8%A7_baz&b=0&u=140&d=2
169
- # inspired by (none of these are directly usable for our idents): https://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
170
- #
171
- # there's a spurious ;A; after the identifier, because the semicolon is dir-neutral, and
172
- # deleting it makes these examples hard to read in many/most editors!
173
- """
174
- foo;A;0066 006F 006F;0 1 2
175
- _foo_ ;A;005F 0066 006F 006F 005F;0 1 2 3 4
176
- __foo__ ;A;005F 005F 0066 006F 006F 005F 005F;0 1 2 3 4 5 6
177
- لامدا_foo ;A;0644 0627 0645 062F 0627 005F 0066 006F 006F;4 3 2 1 0 5 6 7 8
178
- foo_لامدا_baz ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627 005F 0062 0061 007A;0 1 2 3 8 7 6 5 4 9 10 11 12
179
- foo_لامدا ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627;0 1 2 3 8 7 6 5 4
180
- foo_لامدا1 ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627 0031;0 1 2 3 9 8 7 6 5 4
181
- foo_لامدا_حدد ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627 005F 062D 062F 062F;0 1 2 3 12 11 10 9 8 7 6 5 4
182
- foo_لامدا_حدد1 ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627 005F 062D 062F 062F 0031;0 1 2 3 13 12 11 10 9 8 7 6 5 4
183
- foo_لامدا_حدد1_bar ;A; 0066 006F 006F 005F 0644 0627 0645 062F 0627 005F 062D 062F 062F 0031 005F 0062 0061 0072;0 1 2 3 13 12 11 10 9 8 7 6 5 4 14 15 16 17
184
- foo_لامدا_حدد1_bar1 ;A;0066 006F 006F 005F 0644 0627 0645 062F 0627 005F 062D 062F 062F 0031 005F 0062 0061 0072 0031;0 1 2 3 13 12 11 10 9 8 7 6 5 4 14 15 16 17 18
185
- """
186
- |> String . split ( "\n " , trim: true )
187
- |> Enum . map ( & String . split ( & 1 , ";" , trim: true ) )
188
- |> Enum . each ( fn
189
- [ ident , _ , bytes , indices | _rest ] ->
190
- bytes = String . split ( bytes , " " , trim: true ) |> Enum . map ( & String . to_integer ( & 1 , 16 ) )
191
- indices = String . split ( indices , " " , trim: true ) |> Enum . map ( & String . to_integer / 1 )
192
- display_ordered = for i <- indices , do: Enum . at ( bytes , i )
193
- unbidified = String.Tokenizer.Security . unbidify ( bytes )
194
-
195
- assert ( display_ordered == unbidified , """
196
- Failing String.Tokenizer.Security.unbidify/1 case for: '#{ ident } '
197
- bytes : #{ bytes |> Enum . map ( & Integer . to_string ( & 1 , 16 ) ) |> Enum . join ( " " ) }
198
- byte order : #{ bytes |> Enum . intersperse ( 32 ) }
199
- uax9 order : #{ display_ordered |> Enum . intersperse ( 32 ) }
200
- uax9 indices : #{ indices |> Enum . join ( " " ) }
201
- unbidify/1 : #{ unbidified |> Enum . intersperse ( 32 ) }
202
- """ )
203
- end )
204
- end
205
-
206
- test "prevents unsafe script mixing in identifiers" do
207
- exception =
208
- assert_raise SyntaxError , fn ->
209
- Code . string_to_quoted! ( "if аdmin_, do: :ok, else: :err" )
210
- end
211
-
212
- assert Exception . message ( exception ) =~ "nofile:1:9:"
213
- assert Exception . message ( exception ) =~ "invalid mixed-script identifier found: аdmin"
214
-
215
- for s <- [
216
- "\\ u0430 а {Cyrillic}" ,
217
- "\\ u0064 d {Latin}" ,
218
- "\\ u006D m {Latin}" ,
219
- "\\ u0069 i {Latin}" ,
220
- "\\ u006E n {Latin}" ,
221
- "\\ u005F _"
222
- ] do
223
- assert Exception . message ( exception ) =~ s
224
- end
225
-
226
- # includes suggestion about what to change
227
- assert Exception . message ( exception ) =~ """
228
- Hint: You could write the above in a similar way that is accepted by Elixir:
229
- """
230
-
231
- assert Exception . message ( exception ) =~ """
232
- "admin_" (code points 0x00061 0x00064 0x0006D 0x00069 0x0006E 0x0005F)
233
- """
234
-
235
- # a is in cyrillic
236
- assert_raise SyntaxError , ~r/ mixed/ , fn -> Code . string_to_quoted! ( "[аdmin: 1]" ) end
237
- assert_raise SyntaxError , ~r/ mixed/ , fn -> Code . string_to_quoted! ( "[{:аdmin, 1}]" ) end
238
- assert_raise SyntaxError , ~r/ mixed/ , fn -> Code . string_to_quoted! ( "quote do: аdmin(1)" ) end
239
-
240
- # c is Latin
241
- assert_raise SyntaxError , ~r/ mixed/ , fn -> Code . string_to_quoted! ( "http_cервер = 1" ) end
242
-
243
- # T is in cyrillic
244
- assert_raise SyntaxError , ~r/ mixed/ , fn -> Code . string_to_quoted! ( "[Тシャツ: 1]" ) end
245
- end
246
-
247
- test "allows legitimate script mixing" do
248
- # writing systems that legitimately mix multiple scripts, and Common chars like _
249
- assert capture_eval ( "幻ㄒㄧㄤ = 1" ) == ""
250
- assert capture_eval ( "幻ㄒㄧㄤ1 = 1" ) == ""
251
- assert capture_eval ( "__सवव_1? = 1" ) == ""
252
-
253
- # uts39 5.2 allowed 'highly restrictive' script mixing, like 't-shirt' in Jpan:
254
- assert capture_quoted ( ":Tシャツ" ) == ""
255
-
256
- # elixir's normalizations combine scriptsets of the 'from' and 'to' characters,
257
- # ex: {Common} MICRO => {Greek} MU == {Common, Greek}; Common intersects w/all
258
- assert capture_quoted ( "μs" ) == ""
259
-
260
- # allows mixed scripts if the chunks are all single-script or highly restrictive
261
- assert capture_eval ( "http_сервер = 1" ) == ""
262
- assert capture_eval ( "сервер_http = 1" ) == ""
263
151
end
264
152
end
265
153
0 commit comments