Rollup merge of rust-lang#137204 - nnethercote:clarify-MIR-dialects-and-phases, r=RalfJung

matthiaskrgr · web-flow · commit 15a0403ecf3d · 2025-02-21T12:45:23.000+01:00
Clarify MIR dialects and phases

I found the existing code and docs hard to understand.

r? `@Zalathar`
diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs
@@ -98,20 +98,13 @@ impl<'tcx> HasLocalDecls<'tcx> for Body<'tcx> {
 }
 
 impl MirPhase {
-    /// Gets the index of the current MirPhase within the set of all `MirPhase`s.
-    ///
-    /// FIXME(JakobDegen): Return a `(usize, usize)` instead.
-    pub fn phase_index(&self) -> usize {
-        const BUILT_PHASE_COUNT: usize = 1;
-        const ANALYSIS_PHASE_COUNT: usize = 2;
-        match self {
-            MirPhase::Built => 1,
-            MirPhase::Analysis(analysis_phase) => {
-                1 + BUILT_PHASE_COUNT + (*analysis_phase as usize)
-            }
-            MirPhase::Runtime(runtime_phase) => {
-                1 + BUILT_PHASE_COUNT + ANALYSIS_PHASE_COUNT + (*runtime_phase as usize)
-            }
+    /// Gets the (dialect, phase) index of the current `MirPhase`. Both numbers
+    /// are 1-indexed.
+    pub fn index(&self) -> (usize, usize) {
+        match *self {
+            MirPhase::Built => (1, 1),
+            MirPhase::Analysis(analysis_phase) => (2, 1 + analysis_phase as usize),
+            MirPhase::Runtime(runtime_phase) => (3, 1 + runtime_phase as usize),
         }
     }
 
diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs
@@ -231,7 +231,8 @@ fn dump_path<'tcx>(
     let pass_num = if tcx.sess.opts.unstable_opts.dump_mir_exclude_pass_number {
         String::new()
     } else if pass_num {
-        format!(".{:03}-{:03}", body.phase.phase_index(), body.pass_count)
+        let (dialect_index, phase_index) = body.phase.index();
+        format!(".{}-{}-{:03}", dialect_index, phase_index, body.pass_count)
     } else {
         ".-------".to_string()
     };
diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs
@@ -23,68 +23,77 @@ use crate::ty::{self, GenericArgsRef, List, Region, Ty, UserTypeAnnotationIndex}
 
 /// Represents the "flavors" of MIR.
 ///
-/// All flavors of MIR use the same data structure, but there are some important differences. These
-/// differences come in two forms: Dialects and phases.
+/// The MIR pipeline is structured into a few major dialects, with one or more phases within each
+/// dialect. A MIR flavor is identified by a dialect-phase pair. A single `MirPhase` value
+/// specifies such a pair. All flavors of MIR use the same data structure to represent the program.
 ///
-/// Dialects represent a stronger distinction than phases. This is because the transitions between
-/// dialects are semantic changes, and therefore technically *lowerings* between distinct IRs. In
-/// other words, the same [`Body`](crate::mir::Body) might be well-formed for multiple dialects, but
-/// have different semantic meaning and different behavior at runtime.
+/// Different MIR dialects have different semantics. (The differences between dialects are small,
+/// but they do exist.) The progression from one MIR dialect to the next is technically a lowering
+/// from one IR to another. In other words, a single well-formed [`Body`](crate::mir::Body) might
+/// have different semantic meaning and different behavior at runtime in the different dialects.
+/// The specific differences between dialects are described on the variants below.
 ///
-/// Each dialect additionally has a number of phases. However, phase changes never involve semantic
-/// changes. If some MIR is well-formed both before and after a phase change, it is also guaranteed
-/// that it has the same semantic meaning. In this sense, phase changes can only add additional
-/// restrictions on what MIR is well-formed.
+/// Phases exist only to place restrictions on what language constructs are permitted in
+/// well-formed MIR, and subsequent phases mostly increase those restrictions. I.e. to convert MIR
+/// from one phase to the next might require removing/replacing certain MIR constructs.
 ///
-/// When adding phases, remember to update [`MirPhase::phase_index`].
+/// When adding dialects or phases, remember to update [`MirPhase::index`].
 #[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, PartialOrd, Ord)]
 #[derive(HashStable)]
 pub enum MirPhase {
-    /// The MIR that is generated by MIR building.
+    /// The "built MIR" dialect, as generated by MIR building.
     ///
     /// The only things that operate on this dialect are unsafeck, the various MIR lints, and const
     /// qualifs.
     ///
-    /// This has no distinct phases.
+    /// This dialect has just the one (implicit) phase, which places few restrictions on what MIR
+    /// constructs are allowed.
     Built,
-    /// The MIR used for most analysis.
+
+    /// The "analysis MIR" dialect, used for borrowck and friends.
     ///
-    /// The only semantic change between analysis and built MIR is constant promotion. In built MIR,
-    /// sequences of statements that would generally be subject to constant promotion are
-    /// semantically constants, while in analysis MIR all constants are explicit.
+    /// The only semantic difference between built MIR and analysis MIR relates to constant
+    /// promotion. In built MIR, sequences of statements that would generally be subject to
+    /// constant promotion are semantically constants, while in analysis MIR all constants are
+    /// explicit.
     ///
-    /// The result of const promotion is available from the `mir_promoted` and `promoted_mir` queries.
+    /// The result of const promotion is available from the `mir_promoted` and `promoted_mir`
+    /// queries.
     ///
-    /// This is the version of MIR used by borrowck and friends.
+    /// The phases of this dialect are described in `AnalysisPhase`.
     Analysis(AnalysisPhase),
-    /// The MIR used for CTFE, optimizations, and codegen.
-    ///
-    /// The semantic changes that occur in the lowering from analysis to runtime MIR are as follows:
-    ///
-    ///  - Drops: In analysis MIR, `Drop` terminators represent *conditional* drops; roughly speaking,
-    ///    if dataflow analysis determines that the place being dropped is uninitialized, the drop will
-    ///    not be executed. The exact semantics of this aren't written down anywhere, which means they
-    ///    are essentially "what drop elaboration does." In runtime MIR, the drops are unconditional;
-    ///    when a `Drop` terminator is reached, if the type has drop glue that drop glue is always
-    ///    executed. This may be UB if the underlying place is not initialized.
-    ///  - Packed drops: Places might in general be misaligned - in most cases this is UB, the exception
-    ///    is fields of packed structs. In analysis MIR, `Drop(P)` for a `P` that might be misaligned
-    ///    for this reason implicitly moves `P` to a temporary before dropping. Runtime MIR has no such
-    ///    rules, and dropping a misaligned place is simply UB.
-    ///  - Unwinding: in analysis MIR, unwinding from a function which may not unwind aborts. In runtime
-    ///    MIR, this is UB.
-    ///  - Retags: If `-Zmir-emit-retag` is enabled, analysis MIR has "implicit" retags in the same way
-    ///    that Rust itself has them. Where exactly these are is generally subject to change, and so we
-    ///    don't document this here. Runtime MIR has most retags explicit (though implicit retags
-    ///    can still occur at `Rvalue::{Ref,AddrOf}`).
-    ///  - Coroutine bodies: In analysis MIR, locals may actually be behind a pointer that user code has
-    ///    access to. This occurs in coroutine bodies. Such locals do not behave like other locals,
-    ///    because they eg may be aliased in surprising ways. Runtime MIR has no such special locals -
-    ///    all coroutine bodies are lowered and so all places that look like locals really are locals.
+
+    /// The "runtime MIR" dialect, used for CTFE, optimizations, and codegen.
+    ///
+    /// The semantic differences between analysis MIR and runtime MIR are as follows.
+    ///
+    /// - Drops: In analysis MIR, `Drop` terminators represent *conditional* drops; roughly
+    ///   speaking, if dataflow analysis determines that the place being dropped is uninitialized,
+    ///   the drop will not be executed. The exact semantics of this aren't written down anywhere,
+    ///   which means they are essentially "what drop elaboration does." In runtime MIR, the drops
+    ///   are unconditional; when a `Drop` terminator is reached, if the type has drop glue that
+    ///   drop glue is always executed. This may be UB if the underlying place is not initialized.
+    /// - Packed drops: Places might in general be misaligned - in most cases this is UB, the
+    ///   exception is fields of packed structs. In analysis MIR, `Drop(P)` for a `P` that might be
+    ///   misaligned for this reason implicitly moves `P` to a temporary before dropping. Runtime
+    ///   MIR has no such rules, and dropping a misaligned place is simply UB.
+    /// - Unwinding: in analysis MIR, unwinding from a function which may not unwind aborts. In
+    ///   runtime MIR, this is UB.
+    /// - Retags: If `-Zmir-emit-retag` is enabled, analysis MIR has "implicit" retags in the same
+    ///   way that Rust itself has them. Where exactly these are is generally subject to change,
+    ///   and so we don't document this here. Runtime MIR has most retags explicit (though implicit
+    ///   retags can still occur at `Rvalue::{Ref,AddrOf}`).
+    /// - Coroutine bodies: In analysis MIR, locals may actually be behind a pointer that user code
+    ///   has access to. This occurs in coroutine bodies. Such locals do not behave like other
+    ///   locals, because they e.g. may be aliased in surprising ways. Runtime MIR has no such
+    ///   special locals. All coroutine bodies are lowered and so all places that look like locals
+    ///   really are locals.
     ///
     /// Also note that the lint pass which reports eg `200_u8 + 200_u8` as an error is run as a part
     /// of analysis to runtime MIR lowering. To ensure lints are reported reliably, this means that
-    /// transformations which may suppress such errors should not run on analysis MIR.
+    /// transformations that can suppress such errors should not run on analysis MIR.
+    ///
+    /// The phases of this dialect are described in `RuntimePhase`.
     Runtime(RuntimePhase),
 }
 
@@ -111,7 +120,8 @@ pub enum AnalysisPhase {
     /// * [`TerminatorKind::FalseEdge`]
     /// * [`StatementKind::FakeRead`]
     /// * [`StatementKind::AscribeUserType`]
-    /// * [`StatementKind::Coverage`] with [`CoverageKind::BlockMarker`] or [`CoverageKind::SpanMarker`]
+    /// * [`StatementKind::Coverage`] with [`CoverageKind::BlockMarker`] or
+    ///   [`CoverageKind::SpanMarker`]
     /// * [`Rvalue::Ref`] with `BorrowKind::Fake`
     /// * [`CastKind::PointerCoercion`] with any of the following:
     ///   * [`PointerCoercion::ArrayToPointer`]