Skip to content

coverage: Avoid splitting spans during span extraction/refinement #139102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_mir_transform/src/coverage/mappings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub(super) fn extract_all_mapping_info_from_mir<'tcx>(
}
} else {
// Extract coverage spans from MIR statements/terminators as normal.
extract_refined_covspans(mir_body, hir_info, graph, &mut code_mappings);
extract_refined_covspans(tcx, mir_body, hir_info, graph, &mut code_mappings);
}

branch_pairs.extend(extract_branch_pairs(mir_body, hir_info, graph));
Expand Down
107 changes: 30 additions & 77 deletions compiler/rustc_mir_transform/src/coverage/spans.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::collections::VecDeque;
use std::iter;

use rustc_data_structures::fx::FxHashSet;
use rustc_middle::mir;
use rustc_middle::ty::TyCtxt;
use rustc_span::{DesugaringKind, ExpnKind, MacroKind, Span};
use tracing::{debug, debug_span, instrument};

Expand All @@ -11,8 +13,9 @@ use crate::coverage::{ExtractedHirInfo, mappings, unexpand};

mod from_mir;

pub(super) fn extract_refined_covspans(
mir_body: &mir::Body<'_>,
pub(super) fn extract_refined_covspans<'tcx>(
tcx: TyCtxt<'tcx>,
mir_body: &mir::Body<'tcx>,
hir_info: &ExtractedHirInfo,
graph: &CoverageGraph,
code_mappings: &mut impl Extend<mappings::CodeMapping>,
Expand Down Expand Up @@ -50,7 +53,7 @@ pub(super) fn extract_refined_covspans(
// First, perform the passes that need macro information.
covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb));
remove_unwanted_expansion_spans(&mut covspans);
split_visible_macro_spans(&mut covspans);
shrink_visible_macro_spans(tcx, &mut covspans);

// We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`.
let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>();
Expand Down Expand Up @@ -83,9 +86,7 @@ pub(super) fn extract_refined_covspans(
// Split the covspans into separate buckets that don't overlap any holes.
let buckets = divide_spans_into_buckets(covspans, &holes);

for mut covspans in buckets {
// Make sure each individual bucket is internally sorted.
covspans.sort_by(compare_covspans);
for covspans in buckets {
let _span = debug_span!("processing bucket", ?covspans).entered();

let mut covspans = remove_unwanted_overlapping_spans(covspans);
Expand Down Expand Up @@ -129,82 +130,50 @@ fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) {
}

/// When a span corresponds to a macro invocation that is visible from the
/// function body, split it into two parts. The first part covers just the
/// macro name plus `!`, and the second part covers the rest of the macro
/// invocation. This seems to give better results for code that uses macros.
fn split_visible_macro_spans(covspans: &mut Vec<SpanFromMir>) {
let mut extra_spans = vec![];

covspans.retain(|covspan| {
let Some(ExpnKind::Macro(MacroKind::Bang, visible_macro)) = covspan.expn_kind else {
return true;
};

let split_len = visible_macro.as_str().len() as u32 + 1;
let (before, after) = covspan.span.split_at(split_len);
if !covspan.span.contains(before) || !covspan.span.contains(after) {
// Something is unexpectedly wrong with the split point.
// The debug assertion in `split_at` will have already caught this,
// but in release builds it's safer to do nothing and maybe get a
// bug report for unexpected coverage, rather than risk an ICE.
return true;
/// function body, truncate it to just the macro name plus `!`.
/// This seems to give better results for code that uses macros.
fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) {
let source_map = tcx.sess.source_map();

for covspan in covspans {
if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) {
covspan.span = source_map.span_through_char(covspan.span, '!');
}

extra_spans.push(SpanFromMir::new(before, covspan.expn_kind.clone(), covspan.bcb));
extra_spans.push(SpanFromMir::new(after, covspan.expn_kind.clone(), covspan.bcb));
false // Discard the original covspan that we just split.
});

// The newly-split spans are added at the end, so any previous sorting
// is not preserved.
covspans.extend(extra_spans);
}
}

/// Uses the holes to divide the given covspans into buckets, such that:
/// - No span in any hole overlaps a bucket (truncating the spans if necessary).
/// - No span in any hole overlaps a bucket (discarding spans if necessary).
/// - The spans in each bucket are strictly after all spans in previous buckets,
/// and strictly before all spans in subsequent buckets.
///
/// The resulting buckets are sorted relative to each other, but might not be
/// internally sorted.
/// The lists of covspans and holes must be sorted.
/// The resulting buckets are sorted relative to each other, and each bucket's
/// contents are sorted.
#[instrument(level = "debug")]
fn divide_spans_into_buckets(input_covspans: Vec<Covspan>, holes: &[Hole]) -> Vec<Vec<Covspan>> {
debug_assert!(input_covspans.is_sorted_by(|a, b| compare_spans(a.span, b.span).is_le()));
debug_assert!(holes.is_sorted_by(|a, b| compare_spans(a.span, b.span).is_le()));

// Now we're ready to start carving holes out of the initial coverage spans,
// and grouping them in buckets separated by the holes.
// Now we're ready to start grouping spans into buckets separated by holes.

let mut input_covspans = VecDeque::from(input_covspans);
let mut fragments = vec![];

// For each hole:
// - Identify the spans that are entirely or partly before the hole.
// - Put those spans in a corresponding bucket, truncated to the start of the hole.
// - If one of those spans also extends after the hole, put the rest of it
// in a "fragments" vector that is processed by the next hole.
// - Discard any that overlap with the hole.
// - Add the remaining identified spans to the corresponding bucket.
let mut buckets = (0..holes.len()).map(|_| vec![]).collect::<Vec<_>>();
for (hole, bucket) in holes.iter().zip(&mut buckets) {
let fragments_from_prev = std::mem::take(&mut fragments);

// Only inspect spans that precede or overlap this hole,
// leaving the rest to be inspected by later holes.
// (This relies on the spans and holes both being sorted.)
let relevant_input_covspans =
drain_front_while(&mut input_covspans, |c| c.span.lo() < hole.span.hi());

for covspan in fragments_from_prev.into_iter().chain(relevant_input_covspans) {
let (before, after) = covspan.split_around_hole_span(hole.span);
bucket.extend(before);
fragments.extend(after);
}
bucket.extend(
drain_front_while(&mut input_covspans, |c| c.span.lo() < hole.span.hi())
.filter(|c| !c.span.overlaps(hole.span)),
);
}

// After finding the spans before each hole, any remaining fragments/spans
// form their own final bucket, after the final hole.
// Any remaining spans form their own final bucket, after the final hole.
// (If there were no holes, this will just be all of the initial spans.)
fragments.extend(input_covspans);
buckets.push(fragments);
buckets.push(Vec::from(input_covspans));

buckets
}
Expand All @@ -215,7 +184,7 @@ fn drain_front_while<'a, T>(
queue: &'a mut VecDeque<T>,
mut pred_fn: impl FnMut(&T) -> bool,
) -> impl Iterator<Item = T> {
std::iter::from_fn(move || if pred_fn(queue.front()?) { queue.pop_front() } else { None })
iter::from_fn(move || queue.pop_front_if(|x| pred_fn(x)))
}

/// Takes one of the buckets of (sorted) spans extracted from MIR, and "refines"
Expand Down Expand Up @@ -258,22 +227,6 @@ struct Covspan {
}

impl Covspan {
/// Splits this covspan into 0-2 parts:
/// - The part that is strictly before the hole span, if any.
/// - The part that is strictly after the hole span, if any.
fn split_around_hole_span(&self, hole_span: Span) -> (Option<Self>, Option<Self>) {
let before = try {
let span = self.span.trim_end(hole_span)?;
Self { span, ..*self }
};
let after = try {
let span = self.span.trim_start(hole_span)?;
Self { span, ..*self }
};

(before, after)
}

/// If `self` and `other` can be merged (i.e. they have the same BCB),
/// mutates `self.span` to also include `other.span` and returns true.
///
Expand Down
18 changes: 7 additions & 11 deletions compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,22 +120,20 @@ fn filtered_terminator_span(terminator: &Terminator<'_>) -> Option<Span> {
// an `if condition { block }` has a span that includes the executed block, if true,
// but for coverage, the code region executed, up to *and* through the SwitchInt,
// actually stops before the if's block.)
TerminatorKind::Unreachable // Unreachable blocks are not connected to the MIR CFG
TerminatorKind::Unreachable
| TerminatorKind::Assert { .. }
| TerminatorKind::Drop { .. }
| TerminatorKind::SwitchInt { .. }
// For `FalseEdge`, only the `real` branch is taken, so it is similar to a `Goto`.
| TerminatorKind::FalseEdge { .. }
| TerminatorKind::Goto { .. } => None,

// Call `func` operand can have a more specific span when part of a chain of calls
TerminatorKind::Call { ref func, .. }
| TerminatorKind::TailCall { ref func, .. } => {
TerminatorKind::Call { ref func, .. } | TerminatorKind::TailCall { ref func, .. } => {
let mut span = terminator.source_info.span;
if let mir::Operand::Constant(box constant) = func {
if constant.span.lo() > span.lo() {
span = span.with_lo(constant.span.lo());
}
if let mir::Operand::Constant(constant) = func
&& span.contains(constant.span)
{
span = constant.span;
}
Comment on lines -135 to 137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this to method calls?

And wouldn't you also make this work in case of let x = func; x(foo); to point at the x part of the function call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original version of this heuristic predates me, but I think the intention was to deal with chained calls like this:

    data.foo().bar().baz(args);
//  ^^^^^^^^^^^^^^^^^^^^^^^^^^ (original span of the MIR call to baz)
//                   ^^^^^^^^^ (modified call span before this PR)
//                   ^^^       (modified call span after this PR)

In practice, a lot of these small differences are hard to observe in the output, because there's a subsequent step that merges nearby spans that are considered to have the same control-flow for coverage purposes.

(This merging even happens for non-adjacent/overlapping spans in some cases, which is a behaviour I would eventually like to get rid of.)

Copy link
Contributor Author

@Zalathar Zalathar Apr 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly, I don't know the original reason for restricting this heuristic to “constant” function/method names, but I suspect it comes down to the fact that evaluating a non-constant callee can potentially have its own internal control flow, which can potentially result in confusing spans.

That said, such code is (a) relatively uncommon in practice, and (b) will probably cause the current span-refinement code to just discard the whole call span.

So I guess my reason for not changing it in this PR is to mostly just to avoid changes beyond my intended scope.

Some(span)
}
Expand All @@ -147,9 +145,7 @@ fn filtered_terminator_span(terminator: &Terminator<'_>) -> Option<Span> {
| TerminatorKind::Yield { .. }
| TerminatorKind::CoroutineDrop
| TerminatorKind::FalseUnwind { .. }
| TerminatorKind::InlineAsm { .. } => {
Some(terminator.source_info.span)
}
| TerminatorKind::InlineAsm { .. } => Some(terminator.source_info.span),
}
}

Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_mir_transform/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#![feature(map_try_insert)]
#![feature(never_type)]
#![feature(try_blocks)]
#![feature(vec_deque_pop_if)]
#![feature(yeet_expr)]
// tidy-alphabetical-end

Expand Down
4 changes: 2 additions & 2 deletions tests/coverage/abort.cov-map
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ Number of file 0 mappings: 13
Highest counter ID seen: c4

Function name: abort::might_abort
Raw bytes (21): 0x[01, 01, 01, 01, 05, 03, 01, 03, 01, 01, 14, 05, 02, 09, 01, 24, 02, 02, 0c, 03, 02]
Raw bytes (21): 0x[01, 01, 01, 01, 05, 03, 01, 03, 01, 01, 14, 05, 02, 09, 01, 0f, 02, 02, 0c, 03, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 1
- expression 0 operands: lhs = Counter(0), rhs = Counter(1)
Number of file 0 mappings: 3
- Code(Counter(0)) at (prev + 3, 1) to (start + 1, 20)
- Code(Counter(1)) at (prev + 2, 9) to (start + 1, 36)
- Code(Counter(1)) at (prev + 2, 9) to (start + 1, 15)
- Code(Expression(0, Sub)) at (prev + 2, 12) to (start + 3, 2)
= (c0 - c1)
Highest counter ID seen: c1
Expand Down
4 changes: 2 additions & 2 deletions tests/coverage/assert-ne.cov-map
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
Function name: assert_ne::main
Raw bytes (28): 0x[01, 01, 02, 01, 05, 01, 09, 04, 01, 08, 01, 03, 1c, 05, 04, 0d, 00, 13, 02, 02, 0d, 00, 13, 06, 03, 05, 01, 02]
Raw bytes (28): 0x[01, 01, 02, 01, 05, 01, 09, 04, 01, 08, 01, 03, 15, 05, 04, 0d, 00, 13, 02, 02, 0d, 00, 13, 06, 03, 05, 01, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 2
- expression 0 operands: lhs = Counter(0), rhs = Counter(1)
- expression 1 operands: lhs = Counter(0), rhs = Counter(2)
Number of file 0 mappings: 4
- Code(Counter(0)) at (prev + 8, 1) to (start + 3, 28)
- Code(Counter(0)) at (prev + 8, 1) to (start + 3, 21)
- Code(Counter(1)) at (prev + 4, 13) to (start + 0, 19)
- Code(Expression(0, Sub)) at (prev + 2, 13) to (start + 0, 19)
= (c0 - c1)
Expand Down
2 changes: 1 addition & 1 deletion tests/coverage/assert-ne.coverage
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
LL| |
LL| 1|fn main() {
LL| 1| assert_ne!(
LL| 1| Foo(5), // Make sure this expression's span isn't lost.
LL| 1| black_box(Foo(5)), // Make sure this expression's span isn't lost.
LL| 1| if black_box(false) {
LL| 0| Foo(0) //
LL| | } else {
Expand Down
2 changes: 1 addition & 1 deletion tests/coverage/assert-ne.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct Foo(u32);

fn main() {
assert_ne!(
Foo(5), // Make sure this expression's span isn't lost.
black_box(Foo(5)), // Make sure this expression's span isn't lost.
if black_box(false) {
Foo(0) //
} else {
Expand Down
10 changes: 5 additions & 5 deletions tests/coverage/assert_not.cov-map
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Function name: assert_not::main
Raw bytes (29): 0x[01, 01, 00, 05, 01, 06, 01, 01, 12, 01, 02, 05, 00, 14, 01, 01, 05, 00, 14, 01, 01, 05, 00, 16, 01, 01, 01, 00, 02]
Raw bytes (29): 0x[01, 01, 00, 05, 01, 06, 01, 01, 11, 01, 02, 05, 00, 13, 01, 01, 05, 00, 13, 01, 01, 05, 00, 15, 01, 01, 01, 00, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 5
- Code(Counter(0)) at (prev + 6, 1) to (start + 1, 18)
- Code(Counter(0)) at (prev + 2, 5) to (start + 0, 20)
- Code(Counter(0)) at (prev + 1, 5) to (start + 0, 20)
- Code(Counter(0)) at (prev + 1, 5) to (start + 0, 22)
- Code(Counter(0)) at (prev + 6, 1) to (start + 1, 17)
- Code(Counter(0)) at (prev + 2, 5) to (start + 0, 19)
- Code(Counter(0)) at (prev + 1, 5) to (start + 0, 19)
- Code(Counter(0)) at (prev + 1, 5) to (start + 0, 21)
- Code(Counter(0)) at (prev + 1, 1) to (start + 0, 2)
Highest counter ID seen: c0

8 changes: 4 additions & 4 deletions tests/coverage/async_block.cov-map
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Function name: async_block::main
Raw bytes (36): 0x[01, 01, 01, 05, 01, 06, 01, 07, 01, 00, 0b, 02, 01, 09, 00, 0a, 05, 00, 0e, 00, 13, 02, 00, 14, 01, 16, 02, 07, 0a, 02, 06, 01, 03, 01, 00, 02]
Raw bytes (36): 0x[01, 01, 01, 05, 01, 06, 01, 07, 01, 00, 0b, 02, 01, 09, 00, 0a, 05, 00, 0e, 00, 13, 02, 01, 0d, 00, 13, 02, 07, 09, 00, 22, 01, 02, 01, 00, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 1
Expand All @@ -9,11 +9,11 @@ Number of file 0 mappings: 6
- Code(Expression(0, Sub)) at (prev + 1, 9) to (start + 0, 10)
= (c1 - c0)
- Code(Counter(1)) at (prev + 0, 14) to (start + 0, 19)
- Code(Expression(0, Sub)) at (prev + 0, 20) to (start + 1, 22)
- Code(Expression(0, Sub)) at (prev + 1, 13) to (start + 0, 19)
= (c1 - c0)
- Code(Expression(0, Sub)) at (prev + 7, 10) to (start + 2, 6)
- Code(Expression(0, Sub)) at (prev + 7, 9) to (start + 0, 34)
= (c1 - c0)
- Code(Counter(0)) at (prev + 3, 1) to (start + 0, 2)
- Code(Counter(0)) at (prev + 2, 1) to (start + 0, 2)
Highest counter ID seen: c1

Function name: async_block::main::{closure#0}
Expand Down
2 changes: 1 addition & 1 deletion tests/coverage/async_block.coverage
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
LL| 12| }
LL| 16| };
LL| 16| executor::block_on(future);
LL| 16| }
LL| | }
LL| 1|}

14 changes: 8 additions & 6 deletions tests/coverage/async_closure.cov-map
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,23 @@ Number of file 0 mappings: 2
Highest counter ID seen: c0

Function name: async_closure::main::{closure#0}
Raw bytes (9): 0x[01, 01, 00, 01, 01, 0b, 23, 00, 24]
Raw bytes (14): 0x[01, 01, 00, 02, 01, 0b, 22, 00, 23, 01, 00, 23, 00, 24]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 11, 35) to (start + 0, 36)
Number of file 0 mappings: 2
- Code(Counter(0)) at (prev + 11, 34) to (start + 0, 35)
- Code(Counter(0)) at (prev + 0, 35) to (start + 0, 36)
Highest counter ID seen: c0

Function name: async_closure::main::{closure#0}
Raw bytes (9): 0x[01, 01, 00, 01, 01, 0b, 23, 00, 24]
Raw bytes (14): 0x[01, 01, 00, 02, 01, 0b, 22, 00, 23, 01, 00, 23, 00, 24]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 1
- Code(Counter(0)) at (prev + 11, 35) to (start + 0, 36)
Number of file 0 mappings: 2
- Code(Counter(0)) at (prev + 11, 34) to (start + 0, 35)
- Code(Counter(0)) at (prev + 0, 35) to (start + 0, 36)
Highest counter ID seen: c0

Function name: async_closure::main::{closure#0}::{closure#0}::<i16>
Expand Down
4 changes: 3 additions & 1 deletion tests/coverage/async_closure.coverage
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
LL| |
LL| 1|pub fn main() {
LL| 2| let async_closure = async || {};
^1
------------------
| async_closure::main::{closure#0}:
| LL| 1| let async_closure = async || {};
------------------
| async_closure::main::{closure#0}:
| LL| 1| let async_closure = async || {};
------------------
| async_closure::main::{closure#0}::{closure#0}::<i16>:
| LL| 1| let async_closure = async || {};
------------------
LL| 1| executor::block_on(async_closure());
LL| 1| executor::block_on(call_once(async_closure));
Expand Down
Loading
Loading