|
| 1 | +use itertools::EitherOrBoth; |
| 2 | +use itertools::Itertools; |
| 3 | + |
| 4 | +/// Iterator which breaks an identifier into various [VersionChunk]s. |
| 5 | +struct VersionChunkIter<'a> { |
| 6 | + ident: &'a str, |
| 7 | + start: usize, |
| 8 | +} |
| 9 | + |
| 10 | +impl<'a> VersionChunkIter<'a> { |
| 11 | + pub(crate) fn new(ident: &'a str) -> Self { |
| 12 | + Self { ident, start: 0 } |
| 13 | + } |
| 14 | + |
| 15 | + fn parse_numeric_chunk( |
| 16 | + &mut self, |
| 17 | + mut chars: std::str::CharIndices<'a>, |
| 18 | + ) -> Option<VersionChunk<'a>> { |
| 19 | + let mut end = self.start; |
| 20 | + let mut is_end_of_chunk = false; |
| 21 | + |
| 22 | + while let Some((idx, c)) = chars.next() { |
| 23 | + end = self.start + idx; |
| 24 | + |
| 25 | + if c.is_ascii_digit() { |
| 26 | + continue; |
| 27 | + } |
| 28 | + |
| 29 | + is_end_of_chunk = true; |
| 30 | + break; |
| 31 | + } |
| 32 | + |
| 33 | + let source = if is_end_of_chunk { |
| 34 | + let value = &self.ident[self.start..end]; |
| 35 | + self.start = end; |
| 36 | + value |
| 37 | + } else { |
| 38 | + let value = &self.ident[self.start..]; |
| 39 | + self.start = self.ident.len(); |
| 40 | + value |
| 41 | + }; |
| 42 | + |
| 43 | + let zeros = source.chars().take_while(|c| *c == '0').count(); |
| 44 | + let value = source.parse::<usize>().ok()?; |
| 45 | + |
| 46 | + Some(VersionChunk::Number { |
| 47 | + value, |
| 48 | + zeros, |
| 49 | + source, |
| 50 | + }) |
| 51 | + } |
| 52 | + |
| 53 | + fn parse_str_chunk( |
| 54 | + &mut self, |
| 55 | + mut chars: std::str::CharIndices<'a>, |
| 56 | + ) -> Option<VersionChunk<'a>> { |
| 57 | + let mut end = self.start; |
| 58 | + let mut is_end_of_chunk = false; |
| 59 | + |
| 60 | + while let Some((idx, c)) = chars.next() { |
| 61 | + end = self.start + idx; |
| 62 | + |
| 63 | + if c == '_' { |
| 64 | + is_end_of_chunk = true; |
| 65 | + break; |
| 66 | + } |
| 67 | + |
| 68 | + if !c.is_numeric() { |
| 69 | + continue; |
| 70 | + } |
| 71 | + |
| 72 | + is_end_of_chunk = true; |
| 73 | + break; |
| 74 | + } |
| 75 | + |
| 76 | + let source = if is_end_of_chunk { |
| 77 | + let value = &self.ident[self.start..end]; |
| 78 | + self.start = end; |
| 79 | + value |
| 80 | + } else { |
| 81 | + let value = &self.ident[self.start..]; |
| 82 | + self.start = self.ident.len(); |
| 83 | + value |
| 84 | + }; |
| 85 | + |
| 86 | + Some(VersionChunk::Str(source)) |
| 87 | + } |
| 88 | +} |
| 89 | + |
| 90 | +impl<'a> Iterator for VersionChunkIter<'a> { |
| 91 | + type Item = VersionChunk<'a>; |
| 92 | + |
| 93 | + fn next(&mut self) -> Option<Self::Item> { |
| 94 | + let mut chars = self.ident[self.start..].char_indices(); |
| 95 | + let (_, next) = chars.next()?; |
| 96 | + |
| 97 | + if next == '_' { |
| 98 | + self.start = self.start + next.len_utf8(); |
| 99 | + return Some(VersionChunk::Underscore); |
| 100 | + } |
| 101 | + |
| 102 | + if next.is_ascii_digit() { |
| 103 | + return self.parse_numeric_chunk(chars); |
| 104 | + } |
| 105 | + |
| 106 | + self.parse_str_chunk(chars) |
| 107 | + } |
| 108 | +} |
| 109 | + |
| 110 | +/// Represents a chunk in the version-sort algorithm |
| 111 | +#[derive(Debug, PartialEq, Eq)] |
| 112 | +enum VersionChunk<'a> { |
| 113 | + /// A singel `_` in an identifier. Underscores are sorted before all other characters. |
| 114 | + Underscore, |
| 115 | + /// A &str chunk in the version sort. |
| 116 | + Str(&'a str), |
| 117 | + /// A numeric chunk in the version sort. Keeps track of the numeric value and leading zeros. |
| 118 | + Number { |
| 119 | + value: usize, |
| 120 | + zeros: usize, |
| 121 | + source: &'a str, |
| 122 | + }, |
| 123 | +} |
| 124 | + |
| 125 | +/// Determine which side of the version-sort comparison had more leading zeros. |
| 126 | +#[derive(Debug, PartialEq, Eq)] |
| 127 | +enum MoreLeadingZeros { |
| 128 | + Left, |
| 129 | + Right, |
| 130 | + Equal, |
| 131 | +} |
| 132 | + |
| 133 | +/// Compare two identifiers based on the version sorting algorithm described in [the style guide] |
| 134 | +/// |
| 135 | +/// [the style guide]: https://doc.rust-lang.org/nightly/style-guide/#sorting |
| 136 | +pub(crate) fn version_sort(a: &str, b: &str) -> std::cmp::Ordering { |
| 137 | + let iter_a = VersionChunkIter::new(a); |
| 138 | + let iter_b = VersionChunkIter::new(b); |
| 139 | + let mut more_leading_zeros = MoreLeadingZeros::Equal; |
| 140 | + |
| 141 | + for either_or_both in iter_a.zip_longest(iter_b) { |
| 142 | + match either_or_both { |
| 143 | + EitherOrBoth::Left(_) => return std::cmp::Ordering::Greater, |
| 144 | + EitherOrBoth::Right(_) => return std::cmp::Ordering::Less, |
| 145 | + EitherOrBoth::Both(a, b) => match (a, b) { |
| 146 | + (VersionChunk::Underscore, VersionChunk::Underscore) => { |
| 147 | + continue; |
| 148 | + } |
| 149 | + (VersionChunk::Underscore, _) => return std::cmp::Ordering::Less, |
| 150 | + (_, VersionChunk::Underscore) => return std::cmp::Ordering::Greater, |
| 151 | + (VersionChunk::Str(ca), VersionChunk::Str(cb)) |
| 152 | + | (VersionChunk::Str(ca), VersionChunk::Number { source: cb, .. }) |
| 153 | + | (VersionChunk::Number { source: ca, .. }, VersionChunk::Str(cb)) => { |
| 154 | + match ca.cmp(&cb) { |
| 155 | + std::cmp::Ordering::Equal => { |
| 156 | + continue; |
| 157 | + } |
| 158 | + order @ _ => return order, |
| 159 | + } |
| 160 | + } |
| 161 | + ( |
| 162 | + VersionChunk::Number { |
| 163 | + value: va, |
| 164 | + zeros: lza, |
| 165 | + .. |
| 166 | + }, |
| 167 | + VersionChunk::Number { |
| 168 | + value: vb, |
| 169 | + zeros: lzb, |
| 170 | + .. |
| 171 | + }, |
| 172 | + ) => match va.cmp(&vb) { |
| 173 | + std::cmp::Ordering::Equal => { |
| 174 | + if lza == lzb { |
| 175 | + continue; |
| 176 | + } |
| 177 | + |
| 178 | + if more_leading_zeros == MoreLeadingZeros::Equal && lza > lzb { |
| 179 | + more_leading_zeros = MoreLeadingZeros::Left; |
| 180 | + } else if more_leading_zeros == MoreLeadingZeros::Equal && lza < lzb { |
| 181 | + more_leading_zeros = MoreLeadingZeros::Right; |
| 182 | + } |
| 183 | + continue; |
| 184 | + } |
| 185 | + order @ _ => return order, |
| 186 | + }, |
| 187 | + }, |
| 188 | + } |
| 189 | + } |
| 190 | + |
| 191 | + match more_leading_zeros { |
| 192 | + MoreLeadingZeros::Equal => std::cmp::Ordering::Equal, |
| 193 | + MoreLeadingZeros::Left => std::cmp::Ordering::Less, |
| 194 | + MoreLeadingZeros::Right => std::cmp::Ordering::Greater, |
| 195 | + } |
| 196 | +} |
| 197 | + |
| 198 | +#[cfg(test)] |
| 199 | +mod test { |
| 200 | + use super::*; |
| 201 | + |
| 202 | + #[test] |
| 203 | + fn test_chunks() { |
| 204 | + let mut iter = VersionChunkIter::new("x86_128"); |
| 205 | + assert_eq!(iter.next(), Some(VersionChunk::Str("x"))); |
| 206 | + assert_eq!( |
| 207 | + iter.next(), |
| 208 | + Some(VersionChunk::Number { |
| 209 | + value: 86, |
| 210 | + zeros: 0, |
| 211 | + source: "86" |
| 212 | + }) |
| 213 | + ); |
| 214 | + assert_eq!(iter.next(), Some(VersionChunk::Underscore)); |
| 215 | + assert_eq!( |
| 216 | + iter.next(), |
| 217 | + Some(VersionChunk::Number { |
| 218 | + value: 128, |
| 219 | + zeros: 0, |
| 220 | + source: "128" |
| 221 | + }) |
| 222 | + ); |
| 223 | + assert_eq!(iter.next(), None); |
| 224 | + |
| 225 | + let mut iter = VersionChunkIter::new("w005s09t"); |
| 226 | + assert_eq!(iter.next(), Some(VersionChunk::Str("w"))); |
| 227 | + assert_eq!( |
| 228 | + iter.next(), |
| 229 | + Some(VersionChunk::Number { |
| 230 | + value: 5, |
| 231 | + zeros: 2, |
| 232 | + source: "005" |
| 233 | + }) |
| 234 | + ); |
| 235 | + assert_eq!(iter.next(), Some(VersionChunk::Str("s"))); |
| 236 | + assert_eq!( |
| 237 | + iter.next(), |
| 238 | + Some(VersionChunk::Number { |
| 239 | + value: 9, |
| 240 | + zeros: 1, |
| 241 | + source: "09" |
| 242 | + }) |
| 243 | + ); |
| 244 | + assert_eq!(iter.next(), Some(VersionChunk::Str("t"))); |
| 245 | + assert_eq!(iter.next(), None); |
| 246 | + |
| 247 | + let mut iter = VersionChunkIter::new("ZY_WX"); |
| 248 | + assert_eq!(iter.next(), Some(VersionChunk::Str("ZY"))); |
| 249 | + assert_eq!(iter.next(), Some(VersionChunk::Underscore)); |
| 250 | + assert_eq!(iter.next(), Some(VersionChunk::Str("WX"))); |
| 251 | + |
| 252 | + let mut iter = VersionChunkIter::new("_v1"); |
| 253 | + assert_eq!(iter.next(), Some(VersionChunk::Underscore)); |
| 254 | + assert_eq!(iter.next(), Some(VersionChunk::Str("v"))); |
| 255 | + assert_eq!( |
| 256 | + iter.next(), |
| 257 | + Some(VersionChunk::Number { |
| 258 | + value: 1, |
| 259 | + zeros: 0, |
| 260 | + source: "1" |
| 261 | + }) |
| 262 | + ); |
| 263 | + |
| 264 | + let mut iter = VersionChunkIter::new("_1v"); |
| 265 | + assert_eq!(iter.next(), Some(VersionChunk::Underscore)); |
| 266 | + assert_eq!( |
| 267 | + iter.next(), |
| 268 | + Some(VersionChunk::Number { |
| 269 | + value: 1, |
| 270 | + zeros: 0, |
| 271 | + source: "1" |
| 272 | + }) |
| 273 | + ); |
| 274 | + assert_eq!(iter.next(), Some(VersionChunk::Str("v"))); |
| 275 | + |
| 276 | + let mut iter = VersionChunkIter::new("v009"); |
| 277 | + assert_eq!(iter.next(), Some(VersionChunk::Str("v"))); |
| 278 | + assert_eq!( |
| 279 | + iter.next(), |
| 280 | + Some(VersionChunk::Number { |
| 281 | + value: 9, |
| 282 | + zeros: 2, |
| 283 | + source: "009" |
| 284 | + }) |
| 285 | + ); |
| 286 | + } |
| 287 | + |
| 288 | + #[test] |
| 289 | + fn test_version_sort() { |
| 290 | + let mut input = vec!["", "b", "a"]; |
| 291 | + let expected = vec!["", "a", "b"]; |
| 292 | + input.sort_by(|a, b| version_sort(a, b)); |
| 293 | + assert_eq!(input, expected); |
| 294 | + |
| 295 | + let mut input = vec!["x7x", "xxx"]; |
| 296 | + let expected = vec!["x7x", "xxx"]; |
| 297 | + input.sort_by(|a, b| version_sort(a, b)); |
| 298 | + assert_eq!(input, expected); |
| 299 | + |
| 300 | + let mut input = vec!["applesauce", "apple"]; |
| 301 | + let expected = vec!["apple", "applesauce"]; |
| 302 | + input.sort_by(|a, b| version_sort(a, b)); |
| 303 | + assert_eq!(input, expected); |
| 304 | + |
| 305 | + let mut input = vec!["aaaaa", "aaa_a"]; |
| 306 | + let expected = vec!["aaa_a", "aaaaa"]; |
| 307 | + input.sort_by(|a, b| version_sort(a, b)); |
| 308 | + assert_eq!(input, expected); |
| 309 | + |
| 310 | + let mut input = vec!["AAAAA", "AAA1A", "BBBBB", "BB_BB", "C3CCC"]; |
| 311 | + let expected = vec!["AAA1A", "AAAAA", "BB_BB", "BBBBB", "C3CCC"]; |
| 312 | + input.sort_by(|a, b| version_sort(a, b)); |
| 313 | + assert_eq!(input, expected); |
| 314 | + |
| 315 | + let mut input = vec!["1_000_000", "1_010_001"]; |
| 316 | + let expected = vec!["1_000_000", "1_010_001"]; |
| 317 | + input.sort_by(|a, b| version_sort(a, b)); |
| 318 | + assert_eq!(input, expected); |
| 319 | + |
| 320 | + let mut input = vec![ |
| 321 | + "5", "50", "500", "5_000", "5_005", "5_050", "5_500", "50_000", "50_005", "50_050", |
| 322 | + "50_500", |
| 323 | + ]; |
| 324 | + let expected = vec![ |
| 325 | + "5", "5_000", "5_005", "5_050", "5_500", "50", "50_000", "50_005", "50_050", "50_500", |
| 326 | + "500", |
| 327 | + ]; |
| 328 | + input.sort_by(|a, b| version_sort(a, b)); |
| 329 | + assert_eq!(input, expected); |
| 330 | + |
| 331 | + let mut input = vec!["X86_64", "x86_64", "X86_128", "x86_128"]; |
| 332 | + let expected = vec!["X86_64", "X86_128", "x86_64", "x86_128"]; |
| 333 | + input.sort_by(|a, b| version_sort(a, b)); |
| 334 | + assert_eq!(input, expected); |
| 335 | + |
| 336 | + let mut input = vec!["__", "_"]; |
| 337 | + let expected = vec!["_", "__"]; |
| 338 | + input.sort_by(|a, b| version_sort(a, b)); |
| 339 | + assert_eq!(input, expected); |
| 340 | + |
| 341 | + let mut input = vec!["foo_", "foo"]; |
| 342 | + let expected = vec!["foo", "foo_"]; |
| 343 | + input.sort_by(|a, b| version_sort(a, b)); |
| 344 | + assert_eq!(input, expected); |
| 345 | + |
| 346 | + let mut input = vec!["A", "AA", "B", "a", "aA", "aa", "b"]; |
| 347 | + let expected = vec!["A", "AA", "B", "a", "aA", "aa", "b"]; |
| 348 | + input.sort_by(|a, b| version_sort(a, b)); |
| 349 | + assert_eq!(input, expected); |
| 350 | + |
| 351 | + let mut input = vec![ |
| 352 | + "x87", "Z_YWX", "ZY_WX", "ZYW_X", "ZYWX", "ZYWX_", "v010", "v10", "w005s09t", |
| 353 | + "w5s009t", "x64", "x86", "x86_32", "x86_64", "x86_128", "v000", "v00", "v0", "v0s", |
| 354 | + "v00t", "v0u", "v001", "v01", "v1", "v009", "v09", "v9", "_ZYWX", "u_zzz", "u8", "u16", |
| 355 | + "u32", "u64", "u128", "u256", "ua", "usize", "uz", |
| 356 | + ]; |
| 357 | + let expected = vec![ |
| 358 | + "_ZYWX", "Z_YWX", "ZY_WX", "ZYW_X", "ZYWX", "ZYWX_", "u_zzz", "u8", "u16", "u32", |
| 359 | + "u64", "u128", "u256", "ua", "usize", "uz", "v000", "v00", "v0", "v0s", "v00t", "v0u", |
| 360 | + "v001", "v01", "v1", "v009", "v09", "v9", "v010", "v10", "w005s09t", "w5s009t", "x64", |
| 361 | + "x86", "x86_32", "x86_64", "x86_128", "x87", |
| 362 | + ]; |
| 363 | + input.sort_by(|a, b| version_sort(a, b)); |
| 364 | + assert_eq!(input, expected) |
| 365 | + } |
| 366 | +} |
0 commit comments