Skip to content

Commit 0782c74

Browse files
committed
implement version-sorting algorithm for rust identifiers
The algorithm is described in the [style guide] and was introduced in `r-l/rust 115046`. [style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
1 parent 4489061 commit 0782c74

File tree

2 files changed

+367
-0
lines changed

2 files changed

+367
-0
lines changed

Diff for: src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ mod rewrite;
9393
pub(crate) mod rustfmt_diff;
9494
mod shape;
9595
mod skip;
96+
mod sort;
9697
pub(crate) mod source_file;
9798
pub(crate) mod source_map;
9899
mod spanned;

Diff for: src/sort.rs

+366
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,366 @@
1+
use itertools::EitherOrBoth;
2+
use itertools::Itertools;
3+
4+
/// Iterator which breaks an identifier into various [VersionChunk]s.
5+
struct VersionChunkIter<'a> {
6+
ident: &'a str,
7+
start: usize,
8+
}
9+
10+
impl<'a> VersionChunkIter<'a> {
11+
pub(crate) fn new(ident: &'a str) -> Self {
12+
Self { ident, start: 0 }
13+
}
14+
15+
fn parse_numeric_chunk(
16+
&mut self,
17+
mut chars: std::str::CharIndices<'a>,
18+
) -> Option<VersionChunk<'a>> {
19+
let mut end = self.start;
20+
let mut is_end_of_chunk = false;
21+
22+
while let Some((idx, c)) = chars.next() {
23+
end = self.start + idx;
24+
25+
if c.is_ascii_digit() {
26+
continue;
27+
}
28+
29+
is_end_of_chunk = true;
30+
break;
31+
}
32+
33+
let source = if is_end_of_chunk {
34+
let value = &self.ident[self.start..end];
35+
self.start = end;
36+
value
37+
} else {
38+
let value = &self.ident[self.start..];
39+
self.start = self.ident.len();
40+
value
41+
};
42+
43+
let zeros = source.chars().take_while(|c| *c == '0').count();
44+
let value = source.parse::<usize>().ok()?;
45+
46+
Some(VersionChunk::Number {
47+
value,
48+
zeros,
49+
source,
50+
})
51+
}
52+
53+
fn parse_str_chunk(
54+
&mut self,
55+
mut chars: std::str::CharIndices<'a>,
56+
) -> Option<VersionChunk<'a>> {
57+
let mut end = self.start;
58+
let mut is_end_of_chunk = false;
59+
60+
while let Some((idx, c)) = chars.next() {
61+
end = self.start + idx;
62+
63+
if c == '_' {
64+
is_end_of_chunk = true;
65+
break;
66+
}
67+
68+
if !c.is_numeric() {
69+
continue;
70+
}
71+
72+
is_end_of_chunk = true;
73+
break;
74+
}
75+
76+
let source = if is_end_of_chunk {
77+
let value = &self.ident[self.start..end];
78+
self.start = end;
79+
value
80+
} else {
81+
let value = &self.ident[self.start..];
82+
self.start = self.ident.len();
83+
value
84+
};
85+
86+
Some(VersionChunk::Str(source))
87+
}
88+
}
89+
90+
impl<'a> Iterator for VersionChunkIter<'a> {
91+
type Item = VersionChunk<'a>;
92+
93+
fn next(&mut self) -> Option<Self::Item> {
94+
let mut chars = self.ident[self.start..].char_indices();
95+
let (_, next) = chars.next()?;
96+
97+
if next == '_' {
98+
self.start = self.start + next.len_utf8();
99+
return Some(VersionChunk::Underscore);
100+
}
101+
102+
if next.is_ascii_digit() {
103+
return self.parse_numeric_chunk(chars);
104+
}
105+
106+
self.parse_str_chunk(chars)
107+
}
108+
}
109+
110+
/// Represents a chunk in the version-sort algorithm
111+
#[derive(Debug, PartialEq, Eq)]
112+
enum VersionChunk<'a> {
113+
/// A singel `_` in an identifier. Underscores are sorted before all other characters.
114+
Underscore,
115+
/// A &str chunk in the version sort.
116+
Str(&'a str),
117+
/// A numeric chunk in the version sort. Keeps track of the numeric value and leading zeros.
118+
Number {
119+
value: usize,
120+
zeros: usize,
121+
source: &'a str,
122+
},
123+
}
124+
125+
/// Determine which side of the version-sort comparison had more leading zeros.
126+
#[derive(Debug, PartialEq, Eq)]
127+
enum MoreLeadingZeros {
128+
Left,
129+
Right,
130+
Equal,
131+
}
132+
133+
/// Compare two identifiers based on the version sorting algorithm described in [the style guide]
134+
///
135+
/// [the style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting
136+
pub(crate) fn version_sort(a: &str, b: &str) -> std::cmp::Ordering {
137+
let iter_a = VersionChunkIter::new(a);
138+
let iter_b = VersionChunkIter::new(b);
139+
let mut more_leading_zeros = MoreLeadingZeros::Equal;
140+
141+
for either_or_both in iter_a.zip_longest(iter_b) {
142+
match either_or_both {
143+
EitherOrBoth::Left(_) => return std::cmp::Ordering::Greater,
144+
EitherOrBoth::Right(_) => return std::cmp::Ordering::Less,
145+
EitherOrBoth::Both(a, b) => match (a, b) {
146+
(VersionChunk::Underscore, VersionChunk::Underscore) => {
147+
continue;
148+
}
149+
(VersionChunk::Underscore, _) => return std::cmp::Ordering::Less,
150+
(_, VersionChunk::Underscore) => return std::cmp::Ordering::Greater,
151+
(VersionChunk::Str(ca), VersionChunk::Str(cb))
152+
| (VersionChunk::Str(ca), VersionChunk::Number { source: cb, .. })
153+
| (VersionChunk::Number { source: ca, .. }, VersionChunk::Str(cb)) => {
154+
match ca.cmp(&cb) {
155+
std::cmp::Ordering::Equal => {
156+
continue;
157+
}
158+
order @ _ => return order,
159+
}
160+
}
161+
(
162+
VersionChunk::Number {
163+
value: va,
164+
zeros: lza,
165+
..
166+
},
167+
VersionChunk::Number {
168+
value: vb,
169+
zeros: lzb,
170+
..
171+
},
172+
) => match va.cmp(&vb) {
173+
std::cmp::Ordering::Equal => {
174+
if lza == lzb {
175+
continue;
176+
}
177+
178+
if more_leading_zeros == MoreLeadingZeros::Equal && lza > lzb {
179+
more_leading_zeros = MoreLeadingZeros::Left;
180+
} else if more_leading_zeros == MoreLeadingZeros::Equal && lza < lzb {
181+
more_leading_zeros = MoreLeadingZeros::Right;
182+
}
183+
continue;
184+
}
185+
order @ _ => return order,
186+
},
187+
},
188+
}
189+
}
190+
191+
match more_leading_zeros {
192+
MoreLeadingZeros::Equal => std::cmp::Ordering::Equal,
193+
MoreLeadingZeros::Left => std::cmp::Ordering::Less,
194+
MoreLeadingZeros::Right => std::cmp::Ordering::Greater,
195+
}
196+
}
197+
198+
#[cfg(test)]
199+
mod test {
200+
use super::*;
201+
202+
#[test]
203+
fn test_chunks() {
204+
let mut iter = VersionChunkIter::new("x86_128");
205+
assert_eq!(iter.next(), Some(VersionChunk::Str("x")));
206+
assert_eq!(
207+
iter.next(),
208+
Some(VersionChunk::Number {
209+
value: 86,
210+
zeros: 0,
211+
source: "86"
212+
})
213+
);
214+
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
215+
assert_eq!(
216+
iter.next(),
217+
Some(VersionChunk::Number {
218+
value: 128,
219+
zeros: 0,
220+
source: "128"
221+
})
222+
);
223+
assert_eq!(iter.next(), None);
224+
225+
let mut iter = VersionChunkIter::new("w005s09t");
226+
assert_eq!(iter.next(), Some(VersionChunk::Str("w")));
227+
assert_eq!(
228+
iter.next(),
229+
Some(VersionChunk::Number {
230+
value: 5,
231+
zeros: 2,
232+
source: "005"
233+
})
234+
);
235+
assert_eq!(iter.next(), Some(VersionChunk::Str("s")));
236+
assert_eq!(
237+
iter.next(),
238+
Some(VersionChunk::Number {
239+
value: 9,
240+
zeros: 1,
241+
source: "09"
242+
})
243+
);
244+
assert_eq!(iter.next(), Some(VersionChunk::Str("t")));
245+
assert_eq!(iter.next(), None);
246+
247+
let mut iter = VersionChunkIter::new("ZY_WX");
248+
assert_eq!(iter.next(), Some(VersionChunk::Str("ZY")));
249+
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
250+
assert_eq!(iter.next(), Some(VersionChunk::Str("WX")));
251+
252+
let mut iter = VersionChunkIter::new("_v1");
253+
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
254+
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
255+
assert_eq!(
256+
iter.next(),
257+
Some(VersionChunk::Number {
258+
value: 1,
259+
zeros: 0,
260+
source: "1"
261+
})
262+
);
263+
264+
let mut iter = VersionChunkIter::new("_1v");
265+
assert_eq!(iter.next(), Some(VersionChunk::Underscore));
266+
assert_eq!(
267+
iter.next(),
268+
Some(VersionChunk::Number {
269+
value: 1,
270+
zeros: 0,
271+
source: "1"
272+
})
273+
);
274+
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
275+
276+
let mut iter = VersionChunkIter::new("v009");
277+
assert_eq!(iter.next(), Some(VersionChunk::Str("v")));
278+
assert_eq!(
279+
iter.next(),
280+
Some(VersionChunk::Number {
281+
value: 9,
282+
zeros: 2,
283+
source: "009"
284+
})
285+
);
286+
}
287+
288+
#[test]
289+
fn test_version_sort() {
290+
let mut input = vec!["", "b", "a"];
291+
let expected = vec!["", "a", "b"];
292+
input.sort_by(|a, b| version_sort(a, b));
293+
assert_eq!(input, expected);
294+
295+
let mut input = vec!["x7x", "xxx"];
296+
let expected = vec!["x7x", "xxx"];
297+
input.sort_by(|a, b| version_sort(a, b));
298+
assert_eq!(input, expected);
299+
300+
let mut input = vec!["applesauce", "apple"];
301+
let expected = vec!["apple", "applesauce"];
302+
input.sort_by(|a, b| version_sort(a, b));
303+
assert_eq!(input, expected);
304+
305+
let mut input = vec!["aaaaa", "aaa_a"];
306+
let expected = vec!["aaa_a", "aaaaa"];
307+
input.sort_by(|a, b| version_sort(a, b));
308+
assert_eq!(input, expected);
309+
310+
let mut input = vec!["AAAAA", "AAA1A", "BBBBB", "BB_BB", "C3CCC"];
311+
let expected = vec!["AAA1A", "AAAAA", "BB_BB", "BBBBB", "C3CCC"];
312+
input.sort_by(|a, b| version_sort(a, b));
313+
assert_eq!(input, expected);
314+
315+
let mut input = vec!["1_000_000", "1_010_001"];
316+
let expected = vec!["1_000_000", "1_010_001"];
317+
input.sort_by(|a, b| version_sort(a, b));
318+
assert_eq!(input, expected);
319+
320+
let mut input = vec![
321+
"5", "50", "500", "5_000", "5_005", "5_050", "5_500", "50_000", "50_005", "50_050",
322+
"50_500",
323+
];
324+
let expected = vec![
325+
"5", "5_000", "5_005", "5_050", "5_500", "50", "50_000", "50_005", "50_050", "50_500",
326+
"500",
327+
];
328+
input.sort_by(|a, b| version_sort(a, b));
329+
assert_eq!(input, expected);
330+
331+
let mut input = vec!["X86_64", "x86_64", "X86_128", "x86_128"];
332+
let expected = vec!["X86_64", "X86_128", "x86_64", "x86_128"];
333+
input.sort_by(|a, b| version_sort(a, b));
334+
assert_eq!(input, expected);
335+
336+
let mut input = vec!["__", "_"];
337+
let expected = vec!["_", "__"];
338+
input.sort_by(|a, b| version_sort(a, b));
339+
assert_eq!(input, expected);
340+
341+
let mut input = vec!["foo_", "foo"];
342+
let expected = vec!["foo", "foo_"];
343+
input.sort_by(|a, b| version_sort(a, b));
344+
assert_eq!(input, expected);
345+
346+
let mut input = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
347+
let expected = vec!["A", "AA", "B", "a", "aA", "aa", "b"];
348+
input.sort_by(|a, b| version_sort(a, b));
349+
assert_eq!(input, expected);
350+
351+
let mut input = vec![
352+
"x87", "Z_YWX", "ZY_WX", "ZYW_X", "ZYWX", "ZYWX_", "v010", "v10", "w005s09t",
353+
"w5s009t", "x64", "x86", "x86_32", "x86_64", "x86_128", "v000", "v00", "v0", "v0s",
354+
"v00t", "v0u", "v001", "v01", "v1", "v009", "v09", "v9", "_ZYWX", "u_zzz", "u8", "u16",
355+
"u32", "u64", "u128", "u256", "ua", "usize", "uz",
356+
];
357+
let expected = vec![
358+
"_ZYWX", "Z_YWX", "ZY_WX", "ZYW_X", "ZYWX", "ZYWX_", "u_zzz", "u8", "u16", "u32",
359+
"u64", "u128", "u256", "ua", "usize", "uz", "v000", "v00", "v0", "v0s", "v00t", "v0u",
360+
"v001", "v01", "v1", "v009", "v09", "v9", "v010", "v10", "w005s09t", "w5s009t", "x64",
361+
"x86", "x86_32", "x86_64", "x86_128", "x87",
362+
];
363+
input.sort_by(|a, b| version_sort(a, b));
364+
assert_eq!(input, expected)
365+
}
366+
}

0 commit comments

Comments
 (0)