Skip to content

Commit 24b45c3

Browse files
committed
Auto merge of #114399 - Zalathar:no-renumber, r=jackh726
coverage: Don't bother renumbering expressions on the Rust side The LLVM API that we use to encode coverage mappings already has its own code for removing unused coverage expressions and renumbering the rest. This lets us get rid of our own complex renumbering code, making it easier to change our coverage code in other ways. --- Now that we have tests for coverage mappings (#114843), I've been able to verify that this PR doesn't make the coverage mappings worse, thanks to an explicit simplification step.
2 parents 0fd7ce9 + 041a232 commit 24b45c3

File tree

6 files changed

+258
-316
lines changed

6 files changed

+258
-316
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs

+22-24
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rustc_middle::mir::coverage::{CounterId, MappedExpressionIndex};
1+
use rustc_middle::mir::coverage::{CounterId, ExpressionId, Operand};
22

33
/// Must match the layout of `LLVMRustCounterKind`.
44
#[derive(Copy, Clone, Debug)]
@@ -30,32 +30,25 @@ pub struct Counter {
3030
}
3131

3232
impl Counter {
33-
/// Constructs a new `Counter` of kind `Zero`. For this `CounterKind`, the
34-
/// `id` is not used.
35-
pub fn zero() -> Self {
36-
Self { kind: CounterKind::Zero, id: 0 }
37-
}
33+
/// A `Counter` of kind `Zero`. For this counter kind, the `id` is not used.
34+
pub(crate) const ZERO: Self = Self { kind: CounterKind::Zero, id: 0 };
3835

3936
/// Constructs a new `Counter` of kind `CounterValueReference`.
4037
pub fn counter_value_reference(counter_id: CounterId) -> Self {
4138
Self { kind: CounterKind::CounterValueReference, id: counter_id.as_u32() }
4239
}
4340

4441
/// Constructs a new `Counter` of kind `Expression`.
45-
pub fn expression(mapped_expression_index: MappedExpressionIndex) -> Self {
46-
Self { kind: CounterKind::Expression, id: mapped_expression_index.into() }
47-
}
48-
49-
/// Returns true if the `Counter` kind is `Zero`.
50-
pub fn is_zero(&self) -> bool {
51-
matches!(self.kind, CounterKind::Zero)
42+
pub(crate) fn expression(expression_id: ExpressionId) -> Self {
43+
Self { kind: CounterKind::Expression, id: expression_id.as_u32() }
5244
}
5345

54-
/// An explicitly-named function to get the ID value, making it more obvious
55-
/// that the stored value is now 0-based.
56-
pub fn zero_based_id(&self) -> u32 {
57-
debug_assert!(!self.is_zero(), "`id` is undefined for CounterKind::Zero");
58-
self.id
46+
pub(crate) fn from_operand(operand: Operand) -> Self {
47+
match operand {
48+
Operand::Zero => Self::ZERO,
49+
Operand::Counter(id) => Self::counter_value_reference(id),
50+
Operand::Expression(id) => Self::expression(id),
51+
}
5952
}
6053
}
6154

@@ -81,6 +74,11 @@ pub struct CounterExpression {
8174
}
8275

8376
impl CounterExpression {
77+
/// The dummy expression `(0 - 0)` has a representation of all zeroes,
78+
/// making it marginally more efficient to initialize than `(0 + 0)`.
79+
pub(crate) const DUMMY: Self =
80+
Self { lhs: Counter::ZERO, kind: ExprKind::Subtract, rhs: Counter::ZERO };
81+
8482
pub fn new(lhs: Counter, kind: ExprKind, rhs: Counter) -> Self {
8583
Self { kind, lhs, rhs }
8684
}
@@ -172,7 +170,7 @@ impl CounterMappingRegion {
172170
) -> Self {
173171
Self {
174172
counter,
175-
false_counter: Counter::zero(),
173+
false_counter: Counter::ZERO,
176174
file_id,
177175
expanded_file_id: 0,
178176
start_line,
@@ -220,8 +218,8 @@ impl CounterMappingRegion {
220218
end_col: u32,
221219
) -> Self {
222220
Self {
223-
counter: Counter::zero(),
224-
false_counter: Counter::zero(),
221+
counter: Counter::ZERO,
222+
false_counter: Counter::ZERO,
225223
file_id,
226224
expanded_file_id,
227225
start_line,
@@ -243,8 +241,8 @@ impl CounterMappingRegion {
243241
end_col: u32,
244242
) -> Self {
245243
Self {
246-
counter: Counter::zero(),
247-
false_counter: Counter::zero(),
244+
counter: Counter::ZERO,
245+
false_counter: Counter::ZERO,
248246
file_id,
249247
expanded_file_id: 0,
250248
start_line,
@@ -268,7 +266,7 @@ impl CounterMappingRegion {
268266
) -> Self {
269267
Self {
270268
counter,
271-
false_counter: Counter::zero(),
269+
false_counter: Counter::ZERO,
272270
file_id,
273271
expanded_file_id: 0,
274272
start_line,
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
22

3-
use rustc_index::{IndexSlice, IndexVec};
4-
use rustc_middle::bug;
5-
use rustc_middle::mir::coverage::{
6-
CodeRegion, CounterId, ExpressionId, MappedExpressionIndex, Op, Operand,
7-
};
3+
use rustc_data_structures::fx::FxIndexSet;
4+
use rustc_index::IndexVec;
5+
use rustc_middle::mir::coverage::{CodeRegion, CounterId, ExpressionId, Op, Operand};
86
use rustc_middle::ty::Instance;
97
use rustc_middle::ty::TyCtxt;
108

@@ -128,6 +126,58 @@ impl<'tcx> FunctionCoverage<'tcx> {
128126
self.unreachable_regions.push(region)
129127
}
130128

129+
/// Perform some simplifications to make the final coverage mappings
130+
/// slightly smaller.
131+
///
132+
/// This method mainly exists to preserve the simplifications that were
133+
/// already being performed by the Rust-side expression renumbering, so that
134+
/// the resulting coverage mappings don't get worse.
135+
pub(crate) fn simplify_expressions(&mut self) {
136+
// The set of expressions that either were optimized out entirely, or
137+
// have zero as both of their operands, and will therefore always have
138+
// a value of zero. Other expressions that refer to these as operands
139+
// can have those operands replaced with `Operand::Zero`.
140+
let mut zero_expressions = FxIndexSet::default();
141+
142+
// For each expression, perform simplifications based on lower-numbered
143+
// expressions, and then update the set of always-zero expressions if
144+
// necessary.
145+
// (By construction, expressions can only refer to other expressions
146+
// that have lower IDs, so one simplification pass is sufficient.)
147+
for (id, maybe_expression) in self.expressions.iter_enumerated_mut() {
148+
let Some(expression) = maybe_expression else {
149+
// If an expression is missing, it must have been optimized away,
150+
// so any operand that refers to it can be replaced with zero.
151+
zero_expressions.insert(id);
152+
continue;
153+
};
154+
155+
// If an operand refers to an expression that is always zero, then
156+
// that operand can be replaced with `Operand::Zero`.
157+
let maybe_set_operand_to_zero = |operand: &mut Operand| match &*operand {
158+
Operand::Expression(id) if zero_expressions.contains(id) => {
159+
*operand = Operand::Zero;
160+
}
161+
_ => (),
162+
};
163+
maybe_set_operand_to_zero(&mut expression.lhs);
164+
maybe_set_operand_to_zero(&mut expression.rhs);
165+
166+
// Coverage counter values cannot be negative, so if an expression
167+
// involves subtraction from zero, assume that its RHS must also be zero.
168+
// (Do this after simplifications that could set the LHS to zero.)
169+
if let Expression { lhs: Operand::Zero, op: Op::Subtract, .. } = expression {
170+
expression.rhs = Operand::Zero;
171+
}
172+
173+
// After the above simplifications, if both operands are zero, then
174+
// we know that this expression is always zero too.
175+
if let Expression { lhs: Operand::Zero, rhs: Operand::Zero, .. } = expression {
176+
zero_expressions.insert(id);
177+
}
178+
}
179+
}
180+
131181
/// Return the source hash, generated from the HIR node structure, and used to indicate whether
132182
/// or not the source code structure changed between different compilations.
133183
pub fn source_hash(&self) -> u64 {
@@ -146,8 +196,14 @@ impl<'tcx> FunctionCoverage<'tcx> {
146196
self.instance
147197
);
148198

199+
let counter_expressions = self.counter_expressions();
200+
// Expression IDs are indices into `self.expressions`, and on the LLVM
201+
// side they will be treated as indices into `counter_expressions`, so
202+
// the two vectors should correspond 1:1.
203+
assert_eq!(self.expressions.len(), counter_expressions.len());
204+
149205
let counter_regions = self.counter_regions();
150-
let (counter_expressions, expression_regions) = self.expressions_with_regions();
206+
let expression_regions = self.expression_regions();
151207
let unreachable_regions = self.unreachable_regions();
152208

153209
let counter_regions =
@@ -163,149 +219,53 @@ impl<'tcx> FunctionCoverage<'tcx> {
163219
})
164220
}
165221

166-
fn expressions_with_regions(
167-
&self,
168-
) -> (Vec<CounterExpression>, impl Iterator<Item = (Counter, &CodeRegion)>) {
169-
let mut counter_expressions = Vec::with_capacity(self.expressions.len());
170-
let mut expression_regions = Vec::with_capacity(self.expressions.len());
171-
let mut new_indexes = IndexVec::from_elem_n(None, self.expressions.len());
222+
/// Convert this function's coverage expression data into a form that can be
223+
/// passed through FFI to LLVM.
224+
fn counter_expressions(&self) -> Vec<CounterExpression> {
225+
// We know that LLVM will optimize out any unused expressions before
226+
// producing the final coverage map, so there's no need to do the same
227+
// thing on the Rust side unless we're confident we can do much better.
228+
// (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.)
172229

173-
// This closure converts any `Expression` operand (`lhs` or `rhs` of the `Op::Add` or
174-
// `Op::Subtract` operation) into its native `llvm::coverage::Counter::CounterKind` type
175-
// and value.
176-
//
177-
// Expressions will be returned from this function in a sequential vector (array) of
178-
// `CounterExpression`, so the expression IDs must be mapped from their original,
179-
// potentially sparse set of indexes.
180-
//
181-
// An `Expression` as an operand will have already been encountered as an `Expression` with
182-
// operands, so its new_index will already have been generated (as a 1-up index value).
183-
// (If an `Expression` as an operand does not have a corresponding new_index, it was
184-
// probably optimized out, after the expression was injected into the MIR, so it will
185-
// get a `CounterKind::Zero` instead.)
186-
//
187-
// In other words, an `Expression`s at any given index can include other expressions as
188-
// operands, but expression operands can only come from the subset of expressions having
189-
// `expression_index`s lower than the referencing `Expression`. Therefore, it is
190-
// reasonable to look up the new index of an expression operand while the `new_indexes`
191-
// vector is only complete up to the current `ExpressionIndex`.
192-
type NewIndexes = IndexSlice<ExpressionId, Option<MappedExpressionIndex>>;
193-
let id_to_counter = |new_indexes: &NewIndexes, operand: Operand| match operand {
194-
Operand::Zero => Some(Counter::zero()),
195-
Operand::Counter(id) => Some(Counter::counter_value_reference(id)),
196-
Operand::Expression(id) => {
197-
self.expressions
198-
.get(id)
199-
.expect("expression id is out of range")
200-
.as_ref()
201-
// If an expression was optimized out, assume it would have produced a count
202-
// of zero. This ensures that expressions dependent on optimized-out
203-
// expressions are still valid.
204-
.map_or(Some(Counter::zero()), |_| new_indexes[id].map(Counter::expression))
205-
}
206-
};
207-
208-
for (original_index, expression) in
209-
self.expressions.iter_enumerated().filter_map(|(original_index, entry)| {
210-
// Option::map() will return None to filter out missing expressions. This may happen
211-
// if, for example, a MIR-instrumented expression is removed during an optimization.
212-
entry.as_ref().map(|expression| (original_index, expression))
213-
})
214-
{
215-
let optional_region = &expression.region;
216-
let Expression { lhs, op, rhs, .. } = *expression;
217-
218-
if let Some(Some((lhs_counter, mut rhs_counter))) = id_to_counter(&new_indexes, lhs)
219-
.map(|lhs_counter| {
220-
id_to_counter(&new_indexes, rhs).map(|rhs_counter| (lhs_counter, rhs_counter))
221-
})
222-
{
223-
if lhs_counter.is_zero() && op.is_subtract() {
224-
// The left side of a subtraction was probably optimized out. As an example,
225-
// a branch condition might be evaluated as a constant expression, and the
226-
// branch could be removed, dropping unused counters in the process.
227-
//
228-
// Since counters are unsigned, we must assume the result of the expression
229-
// can be no more and no less than zero. An expression known to evaluate to zero
230-
// does not need to be added to the coverage map.
231-
//
232-
// Coverage test `loops_branches.rs` includes multiple variations of branches
233-
// based on constant conditional (literal `true` or `false`), and demonstrates
234-
// that the expected counts are still correct.
235-
debug!(
236-
"Expression subtracts from zero (assume unreachable): \
237-
original_index={:?}, lhs={:?}, op={:?}, rhs={:?}, region={:?}",
238-
original_index, lhs, op, rhs, optional_region,
239-
);
240-
rhs_counter = Counter::zero();
230+
self.expressions
231+
.iter()
232+
.map(|expression| match expression {
233+
None => {
234+
// This expression ID was allocated, but we never saw the
235+
// actual expression, so it must have been optimized out.
236+
// Replace it with a dummy expression, and let LLVM take
237+
// care of omitting it from the expression list.
238+
CounterExpression::DUMMY
241239
}
242-
debug_assert!(
243-
lhs_counter.is_zero()
244-
// Note: with `as usize` the ID _could_ overflow/wrap if `usize = u16`
245-
|| ((lhs_counter.zero_based_id() as usize)
246-
<= usize::max(self.counters.len(), self.expressions.len())),
247-
"lhs id={} > both counters.len()={} and expressions.len()={}
248-
({:?} {:?} {:?})",
249-
lhs_counter.zero_based_id(),
250-
self.counters.len(),
251-
self.expressions.len(),
252-
lhs_counter,
253-
op,
254-
rhs_counter,
255-
);
256-
257-
debug_assert!(
258-
rhs_counter.is_zero()
259-
// Note: with `as usize` the ID _could_ overflow/wrap if `usize = u16`
260-
|| ((rhs_counter.zero_based_id() as usize)
261-
<= usize::max(self.counters.len(), self.expressions.len())),
262-
"rhs id={} > both counters.len()={} and expressions.len()={}
263-
({:?} {:?} {:?})",
264-
rhs_counter.zero_based_id(),
265-
self.counters.len(),
266-
self.expressions.len(),
267-
lhs_counter,
268-
op,
269-
rhs_counter,
270-
);
271-
272-
// Both operands exist. `Expression` operands exist in `self.expressions` and have
273-
// been assigned a `new_index`.
274-
let mapped_expression_index =
275-
MappedExpressionIndex::from(counter_expressions.len());
276-
let expression = CounterExpression::new(
277-
lhs_counter,
278-
match op {
279-
Op::Add => ExprKind::Add,
280-
Op::Subtract => ExprKind::Subtract,
281-
},
282-
rhs_counter,
283-
);
284-
debug!(
285-
"Adding expression {:?} = {:?}, region: {:?}",
286-
mapped_expression_index, expression, optional_region
287-
);
288-
counter_expressions.push(expression);
289-
new_indexes[original_index] = Some(mapped_expression_index);
290-
if let Some(region) = optional_region {
291-
expression_regions.push((Counter::expression(mapped_expression_index), region));
240+
&Some(Expression { lhs, op, rhs, .. }) => {
241+
// Convert the operands and operator as normal.
242+
CounterExpression::new(
243+
Counter::from_operand(lhs),
244+
match op {
245+
Op::Add => ExprKind::Add,
246+
Op::Subtract => ExprKind::Subtract,
247+
},
248+
Counter::from_operand(rhs),
249+
)
292250
}
293-
} else {
294-
bug!(
295-
"expression has one or more missing operands \
296-
original_index={:?}, lhs={:?}, op={:?}, rhs={:?}, region={:?}",
297-
original_index,
298-
lhs,
299-
op,
300-
rhs,
301-
optional_region,
302-
);
303-
}
304-
}
305-
(counter_expressions, expression_regions.into_iter())
251+
})
252+
.collect::<Vec<_>>()
253+
}
254+
255+
fn expression_regions(&self) -> Vec<(Counter, &CodeRegion)> {
256+
// Find all of the expression IDs that weren't optimized out AND have
257+
// an attached code region, and return the corresponding mapping as a
258+
// counter/region pair.
259+
self.expressions
260+
.iter_enumerated()
261+
.filter_map(|(id, expression)| {
262+
let code_region = expression.as_ref()?.region.as_ref()?;
263+
Some((Counter::expression(id), code_region))
264+
})
265+
.collect::<Vec<_>>()
306266
}
307267

308268
fn unreachable_regions(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> {
309-
self.unreachable_regions.iter().map(|region| (Counter::zero(), region))
269+
self.unreachable_regions.iter().map(|region| (Counter::ZERO, region))
310270
}
311271
}

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,11 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
6060

6161
// Encode coverage mappings and generate function records
6262
let mut function_data = Vec::new();
63-
for (instance, function_coverage) in function_coverage_map {
63+
for (instance, mut function_coverage) in function_coverage_map {
6464
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
65+
function_coverage.simplify_expressions();
66+
let function_coverage = function_coverage;
67+
6568
let mangled_function_name = tcx.symbol_name(instance).name;
6669
let source_hash = function_coverage.source_hash();
6770
let is_used = function_coverage.is_used();

0 commit comments

Comments
 (0)