Skip to content

Commit 0a07a2c

Browse files
authored
Extract string part and normalized string (#7219)
1 parent 47a253f commit 0a07a2c

File tree

1 file changed

+101
-84
lines changed
  • crates/ruff_python_formatter/src/expression

1 file changed

+101
-84
lines changed

crates/ruff_python_formatter/src/expression/string.rs

+101-84
Original file line numberDiff line numberDiff line change
@@ -138,29 +138,26 @@ impl<'a> FormatString<'a> {
138138

139139
impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
140140
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
141+
let locator = f.context().locator();
141142
match self.layout {
142143
StringLayout::Default => {
143144
if self.string.is_implicit_concatenated() {
144145
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
145146
} else {
146-
FormatStringPart::new(
147-
self.string.range(),
148-
self.string.quoting(&f.context().locator()),
149-
&f.context().locator(),
150-
f.options().quote_style(),
151-
)
152-
.fmt(f)
147+
StringPart::from_source(self.string.range(), &locator)
148+
.normalize(
149+
self.string.quoting(&locator),
150+
&locator,
151+
f.options().quote_style(),
152+
)
153+
.fmt(f)
153154
}
154155
}
155156
StringLayout::DocString => {
156-
let string_part = FormatStringPart::new(
157-
self.string.range(),
158-
// f-strings can't be docstrings
159-
Quoting::CanChange,
160-
&f.context().locator(),
161-
f.options().quote_style(),
162-
);
163-
format_docstring(&string_part, f)
157+
let string_part = StringPart::from_source(self.string.range(), &locator);
158+
let normalized =
159+
string_part.normalize(Quoting::CanChange, &locator, f.options().quote_style());
160+
format_docstring(&normalized, f)
164161
}
165162
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
166163
FormatStringContinuation::new(self.string).fmt(f)
@@ -259,16 +256,14 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
259256
});
260257

261258
let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
259+
let part = StringPart::from_source(token_range, &locator);
260+
let normalized =
261+
part.normalize(self.string.quoting(&locator), &locator, quote_style);
262262

263263
joiner.entry(&format_args![
264264
line_suffix_boundary(),
265265
leading_comments(leading_part_comments),
266-
FormatStringPart::new(
267-
token_range,
268-
self.string.quoting(&locator),
269-
&locator,
270-
quote_style,
271-
),
266+
normalized,
272267
trailing_comments(trailing_part_comments)
273268
]);
274269

@@ -289,21 +284,20 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
289284
}
290285
}
291286

292-
struct FormatStringPart {
287+
#[derive(Debug)]
288+
struct StringPart {
289+
/// The prefix.
293290
prefix: StringPrefix,
294-
preferred_quotes: StringQuotes,
295-
range: TextRange,
296-
is_raw_string: bool,
297-
}
298291

299-
impl Ranged for FormatStringPart {
300-
fn range(&self) -> TextRange {
301-
self.range
302-
}
292+
/// The actual quotes of the string in the source
293+
quotes: StringQuotes,
294+
295+
/// The range of the string's content (full range minus quotes and prefix)
296+
content_range: TextRange,
303297
}
304298

305-
impl FormatStringPart {
306-
fn new(range: TextRange, quoting: Quoting, locator: &Locator, quote_style: QuoteStyle) -> Self {
299+
impl StringPart {
300+
fn from_source(range: TextRange, locator: &Locator) -> Self {
307301
let string_content = locator.slice(range);
308302

309303
let prefix = StringPrefix::parse(string_content);
@@ -317,46 +311,80 @@ impl FormatStringPart {
317311
);
318312
let raw_content_range = relative_raw_content_range + range.start();
319313

320-
let raw_content = &string_content[relative_raw_content_range];
321-
let is_raw_string = prefix.is_raw_string();
314+
Self {
315+
prefix,
316+
content_range: raw_content_range,
317+
quotes,
318+
}
319+
}
320+
321+
/// Computes the strings preferred quotes and normalizes its content.
322+
fn normalize<'a>(
323+
self,
324+
quoting: Quoting,
325+
locator: &'a Locator,
326+
quote_style: QuoteStyle,
327+
) -> NormalizedString<'a> {
328+
let raw_content = locator.slice(self.content_range);
329+
322330
let preferred_quotes = match quoting {
323-
Quoting::Preserve => quotes,
331+
Quoting::Preserve => self.quotes,
324332
Quoting::CanChange => {
325-
if is_raw_string {
326-
preferred_quotes_raw(raw_content, quotes, quote_style)
333+
if self.prefix.is_raw_string() {
334+
preferred_quotes_raw(raw_content, self.quotes, quote_style)
327335
} else {
328-
preferred_quotes(raw_content, quotes, quote_style)
336+
preferred_quotes(raw_content, self.quotes, quote_style)
329337
}
330338
}
331339
};
332340

333-
Self {
334-
prefix,
335-
range: raw_content_range,
341+
let normalized = normalize_string(
342+
locator.slice(self.content_range),
336343
preferred_quotes,
337-
is_raw_string,
344+
self.prefix.is_raw_string(),
345+
);
346+
347+
NormalizedString {
348+
prefix: self.prefix,
349+
content_range: self.content_range,
350+
text: normalized,
351+
quotes: preferred_quotes,
338352
}
339353
}
340354
}
341355

342-
impl Format<PyFormatContext<'_>> for FormatStringPart {
343-
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
344-
let normalized = normalize_string(
345-
f.context().locator().slice(self.range),
346-
self.preferred_quotes,
347-
self.is_raw_string,
348-
);
356+
#[derive(Debug)]
357+
struct NormalizedString<'a> {
358+
prefix: StringPrefix,
359+
360+
/// The quotes of the normalized string (preferred quotes)
361+
quotes: StringQuotes,
349362

350-
write!(f, [self.prefix, self.preferred_quotes])?;
351-
match normalized {
363+
/// The range of the string's content in the source (minus prefix and quotes).
364+
content_range: TextRange,
365+
366+
/// The normalized text
367+
text: Cow<'a, str>,
368+
}
369+
370+
impl Ranged for NormalizedString<'_> {
371+
fn range(&self) -> TextRange {
372+
self.content_range
373+
}
374+
}
375+
376+
impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
377+
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
378+
write!(f, [self.prefix, self.quotes])?;
379+
match &self.text {
352380
Cow::Borrowed(_) => {
353381
source_text_slice(self.range()).fmt(f)?;
354382
}
355383
Cow::Owned(normalized) => {
356-
text(&normalized, Some(self.start())).fmt(f)?;
384+
text(normalized, Some(self.start())).fmt(f)?;
357385
}
358386
}
359-
self.preferred_quotes.fmt(f)
387+
self.quotes.fmt(f)
360388
}
361389
}
362390

@@ -802,35 +830,30 @@ fn count_indentation_like_black(line: &str, tab_width: TabWidth) -> TextSize {
802830
/// line c
803831
/// """
804832
/// ```
805-
fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> FormatResult<()> {
806-
let locator = f.context().locator();
833+
fn format_docstring(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
834+
let docstring = &normalized.text;
807835

808836
// Black doesn't change the indentation of docstrings that contain an escaped newline
809-
if locator.slice(string_part).contains("\\\n") {
810-
return string_part.fmt(f);
837+
if docstring.contains("\\\n") {
838+
return normalized.fmt(f);
811839
}
812840

813-
let normalized = normalize_string(
814-
locator.slice(string_part),
815-
string_part.preferred_quotes,
816-
string_part.is_raw_string,
817-
);
818841
// is_borrowed is unstable :/
819-
let already_normalized = matches!(normalized, Cow::Borrowed(_));
842+
let already_normalized = matches!(docstring, Cow::Borrowed(_));
820843

821-
let mut lines = normalized.lines().peekable();
844+
let mut lines = docstring.lines().peekable();
822845

823846
// Start the string
824847
write!(
825848
f,
826849
[
827-
source_position(string_part.start()),
828-
string_part.prefix,
829-
string_part.preferred_quotes
850+
normalized.prefix,
851+
normalized.quotes,
852+
source_position(normalized.start()),
830853
]
831854
)?;
832855
// We track where in the source docstring we are (in source code byte offsets)
833-
let mut offset = string_part.start();
856+
let mut offset = normalized.start();
834857

835858
// The first line directly after the opening quotes has different rules than the rest, mainly
836859
// that we remove all leading whitespace as there's no indentation
@@ -844,7 +867,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
844867

845868
// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
846869
// inner quotes and closing quotes from getting to close to avoid `""""content`
847-
if trim_both.starts_with(string_part.preferred_quotes.style.as_char()) {
870+
if trim_both.starts_with(normalized.quotes.style.as_char()) {
848871
space().fmt(f)?;
849872
}
850873

@@ -863,15 +886,15 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
863886
offset += first.text_len();
864887

865888
// Check if we have a single line (or empty) docstring
866-
if normalized[first.len()..].trim().is_empty() {
889+
if docstring[first.len()..].trim().is_empty() {
867890
// For `"""\n"""` or other whitespace between the quotes, black keeps a single whitespace,
868891
// but `""""""` doesn't get one inserted.
869-
if needs_chaperone_space(string_part, trim_end)
870-
|| (trim_end.is_empty() && !normalized.is_empty())
892+
if needs_chaperone_space(normalized, trim_end)
893+
|| (trim_end.is_empty() && !docstring.is_empty())
871894
{
872895
space().fmt(f)?;
873896
}
874-
string_part.preferred_quotes.fmt(f)?;
897+
normalized.quotes.fmt(f)?;
875898
return Ok(());
876899
}
877900

@@ -906,27 +929,21 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
906929
}
907930

908931
// Same special case in the last line as for the first line
909-
let trim_end = normalized
932+
let trim_end = docstring
910933
.as_ref()
911934
.trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
912-
if needs_chaperone_space(string_part, trim_end) {
935+
if needs_chaperone_space(normalized, trim_end) {
913936
space().fmt(f)?;
914937
}
915938

916-
write!(
917-
f,
918-
[
919-
string_part.preferred_quotes,
920-
source_position(string_part.end())
921-
]
922-
)
939+
write!(f, [source_position(normalized.end()), normalized.quotes])
923940
}
924941

925942
/// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space
926943
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
927944
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
928-
fn needs_chaperone_space(string_part: &FormatStringPart, trim_end: &str) -> bool {
929-
trim_end.ends_with(string_part.preferred_quotes.style.as_char())
945+
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
946+
trim_end.ends_with(normalized.quotes.style.as_char())
930947
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
931948
}
932949

0 commit comments

Comments
 (0)