Skip to content

Commit 9cd0cde

Browse files
authored
Assert that formatted code doesn't introduce any new unsupported syntax errors (#16549)
## Summary This should give us better coverage for the unsupported syntax error features and increases our confidence that the formatter doesn't accidentially introduce new unsupported syntax errors. A feature like this would have been very useful when working on f-string formatting where it took a lot of iteration to find all Python 3.11 or older incompatibilities. ## Test Plan I applied my changes on top of #16523 and removed the target version check in the with-statement formatting code. As expected, the integration tests now failed
1 parent 05a4c29 commit 9cd0cde

File tree

2 files changed

+111
-20
lines changed

2 files changed

+111
-20
lines changed

crates/ruff_python_formatter/tests/fixtures.rs

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
1+
use crate::normalizer::Normalizer;
2+
use itertools::Itertools;
3+
use ruff_formatter::FormatOptions;
4+
use ruff_python_ast::comparable::ComparableMod;
5+
use ruff_python_formatter::{format_module_source, format_range, PreviewMode, PyFormatOptions};
6+
use ruff_python_parser::{parse, ParseOptions, UnsupportedSyntaxError};
7+
use ruff_source_file::{LineIndex, OneIndexed};
8+
use ruff_text_size::{Ranged, TextRange, TextSize};
9+
use rustc_hash::FxHashMap;
10+
use similar::TextDiff;
111
use std::borrow::Cow;
12+
use std::collections::hash_map::Entry;
213
use std::fmt::{Formatter, Write};
14+
use std::hash::{DefaultHasher, Hash, Hasher};
315
use std::io::BufReader;
416
use std::ops::Range;
517
use std::path::Path;
618
use std::{fmt, fs};
719

8-
use similar::TextDiff;
9-
10-
use crate::normalizer::Normalizer;
11-
use ruff_formatter::FormatOptions;
12-
use ruff_python_ast::comparable::ComparableMod;
13-
use ruff_python_formatter::{format_module_source, format_range, PreviewMode, PyFormatOptions};
14-
use ruff_python_parser::{parse, ParseOptions};
15-
use ruff_source_file::{LineIndex, OneIndexed};
16-
use ruff_text_size::{TextRange, TextSize};
17-
1820
mod normalizer;
1921

2022
#[test]
@@ -379,7 +381,7 @@ Formatted twice:
379381
}
380382
}
381383

382-
/// Ensure that formatting doesn't change the AST.
384+
/// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors.
383385
///
384386
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
385387
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
@@ -393,16 +395,53 @@ fn ensure_unchanged_ast(
393395
let source_type = options.source_type();
394396

395397
// Parse the unformatted code.
396-
let mut unformatted_ast = parse(unformatted_code, ParseOptions::from(source_type))
397-
.expect("Unformatted code to be valid syntax")
398-
.into_syntax();
398+
let unformatted_parsed = parse(
399+
unformatted_code,
400+
ParseOptions::from(source_type).with_target_version(options.target_version()),
401+
)
402+
.expect("Unformatted code to be valid syntax");
403+
404+
let unformatted_unsupported_syntax_errors =
405+
collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors());
406+
let mut unformatted_ast = unformatted_parsed.into_syntax();
407+
399408
Normalizer.visit_module(&mut unformatted_ast);
400409
let unformatted_ast = ComparableMod::from(&unformatted_ast);
401410

402411
// Parse the formatted code.
403-
let mut formatted_ast = parse(formatted_code, ParseOptions::from(source_type))
404-
.expect("Formatted code to be valid syntax")
405-
.into_syntax();
412+
let formatted_parsed = parse(
413+
formatted_code,
414+
ParseOptions::from(source_type).with_target_version(options.target_version()),
415+
)
416+
.expect("Formatted code to be valid syntax");
417+
418+
// Assert that there are no new unsupported syntax errors
419+
let mut formatted_unsupported_syntax_errors =
420+
collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors());
421+
422+
formatted_unsupported_syntax_errors
423+
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));
424+
425+
if !formatted_unsupported_syntax_errors.is_empty() {
426+
let index = LineIndex::from_source_text(formatted_code);
427+
panic!(
428+
"Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---",
429+
input_path.display(),
430+
formatted_unsupported_syntax_errors
431+
.into_values()
432+
.map(|error| {
433+
let location = index.source_location(error.start(), formatted_code);
434+
format!(
435+
"{row}:{col} {error}",
436+
row = location.row,
437+
col = location.column
438+
)
439+
})
440+
.join("\n")
441+
);
442+
}
443+
444+
let mut formatted_ast = formatted_parsed.into_syntax();
406445
Normalizer.visit_module(&mut formatted_ast);
407446
let formatted_ast = ComparableMod::from(&formatted_ast);
408447

@@ -492,3 +531,49 @@ source_type = {source_type:?}"#,
492531
)
493532
}
494533
}
534+
535+
/// Collects the unsupported syntax errors and assigns a unique hash to each error.
536+
fn collect_unsupported_syntax_errors(
537+
errors: &[UnsupportedSyntaxError],
538+
) -> FxHashMap<u64, UnsupportedSyntaxError> {
539+
let mut collected = FxHashMap::default();
540+
541+
for error in errors {
542+
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0);
543+
544+
// Make sure that we do not get a fingerprint that is already in use
545+
// by adding in the previously generated one.
546+
loop {
547+
match collected.entry(error_fingerprint) {
548+
Entry::Occupied(_) => {
549+
error_fingerprint =
550+
fingerprint_unsupported_syntax_error(error, error_fingerprint);
551+
}
552+
Entry::Vacant(entry) => {
553+
entry.insert(error.clone());
554+
break;
555+
}
556+
}
557+
}
558+
}
559+
560+
collected
561+
}
562+
563+
fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 {
564+
let mut hasher = DefaultHasher::new();
565+
566+
let UnsupportedSyntaxError {
567+
kind,
568+
target_version,
569+
// Don't hash the range because the location between the formatted and unformatted code
570+
// is likely to be different
571+
range: _,
572+
} = error;
573+
574+
salt.hash(&mut hasher);
575+
kind.hash(&mut hasher);
576+
target_version.hash(&mut hasher);
577+
578+
hasher.finish()
579+
}

crates/ruff_python_parser/src/error.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::fmt::{self, Display};
22

33
use ruff_python_ast::PythonVersion;
4-
use ruff_text_size::TextRange;
4+
use ruff_text_size::{Ranged, TextRange};
55

66
use crate::TokenKind;
77

@@ -439,14 +439,20 @@ pub struct UnsupportedSyntaxError {
439439
pub target_version: PythonVersion,
440440
}
441441

442+
impl Ranged for UnsupportedSyntaxError {
443+
fn range(&self) -> TextRange {
444+
self.range
445+
}
446+
}
447+
442448
/// The type of tuple unpacking for [`UnsupportedSyntaxErrorKind::StarTuple`].
443-
#[derive(Debug, PartialEq, Clone, Copy)]
449+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
444450
pub enum StarTupleKind {
445451
Return,
446452
Yield,
447453
}
448454

449-
#[derive(Debug, PartialEq, Clone, Copy)]
455+
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
450456
pub enum UnsupportedSyntaxErrorKind {
451457
Match,
452458
Walrus,

0 commit comments

Comments
 (0)