Skip to content

Commit f01dc17

Browse files
committed
Optimize reversing logic
Rule #1: Enum.reverse([1, 2, 3]) ++ [4, 5, 6] is equivalent but slower than Enum.reverse([1, 2, 3], [4, 5, 6]) Rule #2: Enum.reverse([1, 2, 3] ++ [4, 5, 6]) is equivalent but slower than: Enum.reverse([4, 5, 6], Enum.reverse([1, 2, 3])) Rule #3 Enum.reverse(Enum.reverse([1, 2, 3])) is the same as: [1, 2, 3]
1 parent 9924aff commit f01dc17

File tree

2 files changed

+24
-25
lines changed

2 files changed

+24
-25
lines changed

lib/elixir/unicode/security.ex

+24-24
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ defmodule String.Tokenizer.Security do
110110
|> :unicode.characters_to_nfd_list()
111111
end
112112

113-
# unicode 15 adds bidiSkeleton because, w/RTL codepoints, idents that
113+
# Unicode 15 adds bidiSkeleton because, w/RTL codepoints, idents that
114114
# aren't confusable LTR *are* confusable in most places human review
115115
# occurs (editors/browsers, thanks to bidi algo, UAX9).
116116
#
@@ -122,11 +122,11 @@ defmodule String.Tokenizer.Security do
122122
# chars like _ or 0..9 can mix w/RTL chars).
123123
def bidi_skeleton(s) do
124124
# UTS39-28 4:
125-
# 'Bidirectional confusability is costlier to check than
126-
# confusability, as [unicode bidi algo] must be applied.
127-
# [...] a fast path can be used: [...] if X has no characters
128-
# w/bidi classes R or AL, bidiSkeleton(X) = skeleton(X)
129125
#
126+
# Bidirectional confusability is costlier to check than
127+
# confusability, as [unicode bidi algo] must be applied.
128+
# [...] a fast path can be used: [...] if X has no characters
129+
# w/bidi classes R or AL, bidiSkeleton(X) = skeleton(X)
130130
if match?([_, _ | _], s) and any_rtl?(s) do
131131
unbidify(s) |> Enum.map(&confusable_prototype/1)
132132
else
@@ -136,7 +136,7 @@ defmodule String.Tokenizer.Security do
136136

137137
import String.Tokenizer, only: [dir: 1]
138138

139-
defp any_rtl?(s), do: Enum.any?(s, &(:rtl == dir(&1)))
139+
defp any_rtl?(s), do: Enum.any?(s, &(:rtl == String.Tokenizer.dir(&1)))
140140

141141
defp dir_compare(a, b) do
142142
"""
@@ -150,7 +150,7 @@ defmodule String.Tokenizer.Security do
150150

151151
for codepoint <- s, into: init do
152152
hex = :io_lib.format(~c"~4.16.0B", [codepoint])
153-
" \\u#{hex} #{[codepoint]} #{dir(codepoint)}\n"
153+
" \\u#{hex} #{[codepoint]} #{String.Tokenizer.dir(codepoint)}\n"
154154
end
155155
end
156156

@@ -163,29 +163,29 @@ defmodule String.Tokenizer.Security do
163163
# the [...] stack of the [unicode bidi algo]'
164164
def unbidify(chars) when is_list(chars) do
165165
{neutrals, direction, last_part, acc} =
166-
chars
167-
|> Enum.map(&{&1, dir(&1)})
168-
|> Enum.reduce({[], :ltr, [], []}, fn
166+
Enum.reduce(chars, {[], :ltr, [], []}, fn head, {neutrals, part_dir, part, acc} ->
169167
# https://www.unicode.org/reports/tr9/#W2
170-
{head, :weak_number}, {neutrals, part_dir, part, acc} ->
171-
{[], part_dir, [head] ++ neutrals ++ part, acc}
168+
case String.Tokenizer.dir(head) do
169+
:weak_number ->
170+
{[], part_dir, [head] ++ neutrals ++ part, acc}
172171

173-
{head, :neutral}, {neutrals, part_dir, part, acc} ->
174-
{[head | neutrals], part_dir, part, acc}
172+
:neutral ->
173+
{[head | neutrals], part_dir, part, acc}
175174

176-
{head, part_dir}, {neutrals, part_dir, part, acc} ->
177-
{[], part_dir, [head | neutrals] ++ part, acc}
175+
^part_dir ->
176+
{[], part_dir, [head | neutrals] ++ part, acc}
178177

179-
{head, :ltr = head_dir}, {neutrals, :rtl, part, acc} ->
180-
{[], head_dir, [head | neutrals], maybe_reverse(:rtl, part) ++ acc}
178+
:ltr when part_dir == :rtl ->
179+
{[], :ltr, [head | neutrals], Enum.reverse(part, acc)}
181180

182-
{head, :rtl = head_dir}, {neutrals, :ltr, part, acc} ->
183-
{[], head_dir, [head], maybe_reverse(:ltr, neutrals ++ part) ++ acc}
181+
:rtl when part_dir == :ltr ->
182+
{[], :rtl, [head], neutrals ++ part ++ acc}
183+
end
184184
end)
185185

186-
Enum.reverse(maybe_reverse(direction, neutrals ++ last_part) ++ acc)
186+
case direction do
187+
:ltr -> Enum.reverse(acc, Enum.reverse(neutrals ++ last_part))
188+
:rtl -> Enum.reverse(acc, neutrals ++ last_part)
189+
end
187190
end
188-
189-
defp maybe_reverse(:rtl, part), do: Enum.reverse(part)
190-
defp maybe_reverse(:ltr, part), do: part
191191
end

lib/elixir/unicode/tokenizer.ex

-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,6 @@ defmodule String.Tokenizer do
350350
end
351351

352352
def dir(i) when is_integer(i), do: :ltr
353-
def dir(_), do: {:error, :codepoint_must_be_integer}
354353

355354
# Hard-coded normalizations. Also split by upper, start, continue.
356355

0 commit comments

Comments
 (0)