Skip to content

Commit f27a8b8

Browse files
authored
[internal] ComparableExpr (f)strings and bytes made invariant under concatenation (#13301)
1 parent ca0ae0a commit f27a8b8

File tree

3 files changed

+174
-39
lines changed

3 files changed

+174
-39
lines changed

crates/ruff_python_ast/src/comparable.rs

+110-39
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
//! an implicit concatenation of string literals, as these expressions are considered to
1616
//! have the same shape in that they evaluate to the same value.
1717
18+
use std::borrow::Cow;
19+
1820
use crate as ast;
1921

2022
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
@@ -511,7 +513,7 @@ impl<'a> From<&'a ast::ExceptHandler> for ComparableExceptHandler<'a> {
511513

512514
#[derive(Debug, PartialEq, Eq, Hash)]
513515
pub enum ComparableFStringElement<'a> {
514-
Literal(&'a str),
516+
Literal(Cow<'a, str>),
515517
FStringExpressionElement(FStringExpressionElement<'a>),
516518
}
517519

@@ -527,23 +529,34 @@ impl<'a> From<&'a ast::FStringElement> for ComparableFStringElement<'a> {
527529
fn from(fstring_element: &'a ast::FStringElement) -> Self {
528530
match fstring_element {
529531
ast::FStringElement::Literal(ast::FStringLiteralElement { value, .. }) => {
530-
Self::Literal(value)
531-
}
532-
ast::FStringElement::Expression(formatted_value) => {
533-
Self::FStringExpressionElement(FStringExpressionElement {
534-
expression: (&formatted_value.expression).into(),
535-
debug_text: formatted_value.debug_text.as_ref(),
536-
conversion: formatted_value.conversion,
537-
format_spec: formatted_value
538-
.format_spec
539-
.as_ref()
540-
.map(|spec| spec.elements.iter().map(Into::into).collect()),
541-
})
532+
Self::Literal(value.as_ref().into())
542533
}
534+
ast::FStringElement::Expression(formatted_value) => formatted_value.into(),
543535
}
544536
}
545537
}
546538

539+
impl<'a> From<&'a ast::FStringExpressionElement> for ComparableFStringElement<'a> {
540+
fn from(fstring_expression_element: &'a ast::FStringExpressionElement) -> Self {
541+
let ast::FStringExpressionElement {
542+
expression,
543+
debug_text,
544+
conversion,
545+
format_spec,
546+
range: _,
547+
} = fstring_expression_element;
548+
549+
Self::FStringExpressionElement(FStringExpressionElement {
550+
expression: (expression).into(),
551+
debug_text: debug_text.as_ref(),
552+
conversion: *conversion,
553+
format_spec: format_spec
554+
.as_ref()
555+
.map(|spec| spec.elements.iter().map(Into::into).collect()),
556+
})
557+
}
558+
}
559+
547560
#[derive(Debug, PartialEq, Eq, Hash)]
548561
pub struct ComparableElifElseClause<'a> {
549562
test: Option<ComparableExpr<'a>>,
@@ -597,28 +610,82 @@ impl<'a> From<ast::LiteralExpressionRef<'a>> for ComparableLiteral<'a> {
597610

598611
#[derive(Debug, PartialEq, Eq, Hash)]
599612
pub struct ComparableFString<'a> {
600-
elements: Vec<ComparableFStringElement<'a>>,
601-
}
613+
elements: Box<[ComparableFStringElement<'a>]>,
614+
}
615+
616+
impl<'a> From<&'a ast::FStringValue> for ComparableFString<'a> {
617+
// The approach below is somewhat complicated, so it may
618+
// require some justification.
619+
//
620+
// Suppose given an f-string of the form
621+
// `f"{foo!r} one" " and two " f" and three {bar!s}"`
622+
// This decomposes as:
623+
// - An `FStringPart::FString`, `f"{foo!r} one"` with elements
624+
// - `FStringElement::Expression` encoding `{foo!r}`
625+
// - `FStringElement::Literal` encoding " one"
626+
// - An `FStringPart::Literal` capturing `" and two "`
627+
// - An `FStringPart::FString`, `f" and three {bar!s}"` with elements
628+
// - `FStringElement::Literal` encoding " and three "
629+
// - `FStringElement::Expression` encoding `{bar!s}`
630+
//
631+
// We would like to extract from this a vector of (comparable) f-string
632+
// _elements_ which alternate between expression elements and literal
633+
// elements. In order to do so, we need to concatenate adjacent string
634+
// literals. String literals may be separated for two reasons: either
635+
// they appear in adjacent string literal parts, or else a string literal
636+
// part is adjacent to a string literal _element_ inside of an f-string part.
637+
fn from(value: &'a ast::FStringValue) -> Self {
638+
#[derive(Default)]
639+
struct Collector<'a> {
640+
elements: Vec<ComparableFStringElement<'a>>,
641+
}
602642

603-
impl<'a> From<&'a ast::FString> for ComparableFString<'a> {
604-
fn from(fstring: &'a ast::FString) -> Self {
605-
Self {
606-
elements: fstring.elements.iter().map(Into::into).collect(),
643+
impl<'a> Collector<'a> {
644+
// The logic for concatenating adjacent string literals
645+
// occurs here, implicitly: when we encounter a sequence
646+
// of string literals, the first gets pushed to the
647+
// `elements` vector, while subsequent strings
648+
// are concatenated onto this top string.
649+
fn push_literal(&mut self, literal: &'a str) {
650+
if let Some(ComparableFStringElement::Literal(existing_literal)) =
651+
self.elements.last_mut()
652+
{
653+
existing_literal.to_mut().push_str(literal);
654+
} else {
655+
self.elements
656+
.push(ComparableFStringElement::Literal(literal.into()));
657+
}
658+
}
659+
660+
fn push_expression(&mut self, expression: &'a ast::FStringExpressionElement) {
661+
self.elements.push(expression.into());
662+
}
607663
}
608-
}
609-
}
610664

611-
#[derive(Debug, PartialEq, Eq, Hash)]
612-
pub enum ComparableFStringPart<'a> {
613-
Literal(ComparableStringLiteral<'a>),
614-
FString(ComparableFString<'a>),
615-
}
665+
let mut collector = Collector::default();
666+
667+
for part in value {
668+
match part {
669+
ast::FStringPart::Literal(string_literal) => {
670+
collector.push_literal(&string_literal.value);
671+
}
672+
ast::FStringPart::FString(fstring) => {
673+
for element in &fstring.elements {
674+
match element {
675+
ast::FStringElement::Literal(literal) => {
676+
collector.push_literal(&literal.value);
677+
}
678+
ast::FStringElement::Expression(expression) => {
679+
collector.push_expression(expression);
680+
}
681+
}
682+
}
683+
}
684+
}
685+
}
616686

617-
impl<'a> From<&'a ast::FStringPart> for ComparableFStringPart<'a> {
618-
fn from(f_string_part: &'a ast::FStringPart) -> Self {
619-
match f_string_part {
620-
ast::FStringPart::Literal(string_literal) => Self::Literal(string_literal.into()),
621-
ast::FStringPart::FString(f_string) => Self::FString(f_string.into()),
687+
Self {
688+
elements: collector.elements.into_boxed_slice(),
622689
}
623690
}
624691
}
@@ -638,13 +705,13 @@ impl<'a> From<&'a ast::StringLiteral> for ComparableStringLiteral<'a> {
638705

639706
#[derive(Debug, PartialEq, Eq, Hash)]
640707
pub struct ComparableBytesLiteral<'a> {
641-
value: &'a [u8],
708+
value: Cow<'a, [u8]>,
642709
}
643710

644711
impl<'a> From<&'a ast::BytesLiteral> for ComparableBytesLiteral<'a> {
645712
fn from(bytes_literal: &'a ast::BytesLiteral) -> Self {
646713
Self {
647-
value: &bytes_literal.value,
714+
value: Cow::Borrowed(&bytes_literal.value),
648715
}
649716
}
650717
}
@@ -775,17 +842,17 @@ pub struct ExprFStringExpressionElement<'a> {
775842

776843
#[derive(Debug, PartialEq, Eq, Hash)]
777844
pub struct ExprFString<'a> {
778-
parts: Vec<ComparableFStringPart<'a>>,
845+
value: ComparableFString<'a>,
779846
}
780847

781848
#[derive(Debug, PartialEq, Eq, Hash)]
782849
pub struct ExprStringLiteral<'a> {
783-
parts: Vec<ComparableStringLiteral<'a>>,
850+
value: ComparableStringLiteral<'a>,
784851
}
785852

786853
#[derive(Debug, PartialEq, Eq, Hash)]
787854
pub struct ExprBytesLiteral<'a> {
788-
parts: Vec<ComparableBytesLiteral<'a>>,
855+
value: ComparableBytesLiteral<'a>,
789856
}
790857

791858
#[derive(Debug, PartialEq, Eq, Hash)]
@@ -1019,17 +1086,21 @@ impl<'a> From<&'a ast::Expr> for ComparableExpr<'a> {
10191086
}),
10201087
ast::Expr::FString(ast::ExprFString { value, range: _ }) => {
10211088
Self::FString(ExprFString {
1022-
parts: value.iter().map(Into::into).collect(),
1089+
value: value.into(),
10231090
})
10241091
}
10251092
ast::Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) => {
10261093
Self::StringLiteral(ExprStringLiteral {
1027-
parts: value.iter().map(Into::into).collect(),
1094+
value: ComparableStringLiteral {
1095+
value: value.to_str(),
1096+
},
10281097
})
10291098
}
10301099
ast::Expr::BytesLiteral(ast::ExprBytesLiteral { value, range: _ }) => {
10311100
Self::BytesLiteral(ExprBytesLiteral {
1032-
parts: value.iter().map(Into::into).collect(),
1101+
value: ComparableBytesLiteral {
1102+
value: Cow::from(value),
1103+
},
10331104
})
10341105
}
10351106
ast::Expr::NumberLiteral(ast::ExprNumberLiteral { value, range: _ }) => {

crates/ruff_python_ast/src/nodes.rs

+17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![allow(clippy::derive_partial_eq_without_eq)]
22

3+
use std::borrow::Cow;
34
use std::fmt;
45
use std::fmt::Debug;
56
use std::iter::FusedIterator;
@@ -2186,6 +2187,22 @@ impl PartialEq<[u8]> for BytesLiteralValue {
21862187
}
21872188
}
21882189

2190+
impl<'a> From<&'a BytesLiteralValue> for Cow<'a, [u8]> {
2191+
fn from(value: &'a BytesLiteralValue) -> Self {
2192+
match &value.inner {
2193+
BytesLiteralValueInner::Single(BytesLiteral {
2194+
value: bytes_value, ..
2195+
}) => Cow::from(bytes_value.as_ref()),
2196+
BytesLiteralValueInner::Concatenated(bytes_literal_vec) => Cow::Owned(
2197+
bytes_literal_vec
2198+
.iter()
2199+
.flat_map(|bytes_literal| bytes_literal.value.to_vec())
2200+
.collect::<Vec<u8>>(),
2201+
),
2202+
}
2203+
}
2204+
}
2205+
21892206
/// An internal representation of [`BytesLiteralValue`].
21902207
#[derive(Clone, Debug, PartialEq)]
21912208
enum BytesLiteralValueInner {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use ruff_python_ast::comparable::ComparableExpr;
2+
use ruff_python_parser::{parse_expression, ParseError};
3+
4+
#[test]
5+
fn concatenated_strings_compare_equal() -> Result<(), ParseError> {
6+
let split_contents = r#"'a' 'b' r'\n raw'"#;
7+
let value_contents = r#"'ab\\n raw'"#;
8+
9+
let split_parsed = parse_expression(split_contents)?;
10+
let value_parsed = parse_expression(value_contents)?;
11+
12+
let split_compr = ComparableExpr::from(split_parsed.expr());
13+
let value_compr = ComparableExpr::from(value_parsed.expr());
14+
15+
assert_eq!(split_compr, value_compr);
16+
Ok(())
17+
}
18+
19+
#[test]
20+
fn concatenated_bytes_compare_equal() -> Result<(), ParseError> {
21+
let split_contents = r#"b'a' b'b'"#;
22+
let value_contents = r#"b'ab'"#;
23+
24+
let split_parsed = parse_expression(split_contents)?;
25+
let value_parsed = parse_expression(value_contents)?;
26+
27+
let split_compr = ComparableExpr::from(split_parsed.expr());
28+
let value_compr = ComparableExpr::from(value_parsed.expr());
29+
30+
assert_eq!(split_compr, value_compr);
31+
Ok(())
32+
}
33+
34+
#[test]
35+
fn concatenated_fstrings_compare_equal() -> Result<(), ParseError> {
36+
let split_contents = r#"f"{foo!r} this" r"\n raw" f" and {bar!s} that""#;
37+
let value_contents = r#"f"{foo!r} this\\n raw and {bar!s} that""#;
38+
39+
let split_parsed = parse_expression(split_contents)?;
40+
let value_parsed = parse_expression(value_contents)?;
41+
42+
let split_compr = ComparableExpr::from(split_parsed.expr());
43+
let value_compr = ComparableExpr::from(value_parsed.expr());
44+
45+
assert_eq!(split_compr, value_compr);
46+
Ok(())
47+
}

0 commit comments

Comments
 (0)