Skip to content

Commit 0793fc9

Browse files
authored
Inspect special purpose chars using their unicode representation (#13676)
This reduces confusion when working with zero-width characters or alternative spaces. Relates to #13673
1 parent e8ea6a5 commit 0793fc9

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

lib/elixir/lib/code/identifier.ex

+28-2
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,34 @@ defmodule Code.Identifier do
105105

106106
defp escape_char(0), do: [?\\, ?0]
107107

108-
@escaped_bom :binary.bin_to_list("\\uFEFF")
109-
defp escape_char(65279), do: @escaped_bom
108+
defp escape_char(char)
109+
# Some characters that are confusing (zero-width / alternative spaces) are displayed
110+
# using their unicode representation:
111+
# https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Special-purpose_characters
112+
113+
# BOM
114+
when char == 0xFEFF
115+
# Mathematical invisibles
116+
when char in 0x2061..0x2064
117+
# Bidirectional neutral
118+
when char in [0x061C, 0x200E, 0x200F]
119+
# Bidirectional general (source of vulnerabilities)
120+
when char in 0x202A..0x202E
121+
when char in 0x2066..0x2069
122+
# Interlinear annotations
123+
when char in 0xFFF9..0xFFFC
124+
# Zero-width joiners and non-joiners
125+
when char in [0x200C, 0x200D, 0x034F]
126+
# Non-break space / zero-width space
127+
when char in [0x00A0, 0x200B, 0x2060]
128+
# Line/paragraph separators
129+
when char in [0x2028, 0x2029]
130+
# Spaces
131+
when char in 0x2000..0x200A
132+
when char == 0x205F do
133+
<<a::4, b::4, c::4, d::4>> = <<char::16>>
134+
[?\\, ?u, to_hex(a), to_hex(b), to_hex(c), to_hex(d)]
135+
end
110136

111137
defp escape_char(char)
112138
when char in 0x20..0x7E

lib/elixir/test/elixir/inspect_test.exs

+2
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ defmodule Inspect.BitStringTest do
131131
assert inspect(" ゆんゆん") == "\" ゆんゆん\""
132132
# BOM
133133
assert inspect("\uFEFFhello world") == "\"\\uFEFFhello world\""
134+
# Invisible characters
135+
assert inspect("\u2063") == "\"\\u2063\""
134136
end
135137

136138
test "infer" do

0 commit comments

Comments
 (0)