rustc_mir_dataflow/framework/
mod.rs

1//! A framework that can express both [gen-kill] and generic dataflow problems.
2//!
3//! To use this framework, implement the [`Analysis`] trait. There used to be a `GenKillAnalysis`
4//! alternative trait for gen-kill analyses that would pre-compute the transfer function for each
5//! block. It was intended as an optimization, but it ended up not being any faster than
6//! `Analysis`.
7//!
8//! The `impls` module contains several examples of dataflow analyses.
9//!
10//! Then call `iterate_to_fixpoint` on your type that impls `Analysis` to get a `Results`. From
11//! there, you can use a `ResultsCursor` to inspect the fixpoint solution to your dataflow problem
12//! (good for inspecting a small number of locations), or implement the `ResultsVisitor` interface
13//! and use `visit_results` (good for inspecting many or all locations). The following example uses
14//! the `ResultsCursor` approach.
15//!
16//! ```ignore (cross-crate-imports)
17//! use rustc_const_eval::dataflow::Analysis; // Makes `iterate_to_fixpoint` available.
18//!
19//! fn do_my_analysis(tcx: TyCtxt<'tcx>, body: &mir::Body<'tcx>) {
20//!     let analysis = MyAnalysis::new()
21//!         .iterate_to_fixpoint(tcx, body, None)
22//!         .into_results_cursor(body);
23//!
24//!     // Print the dataflow state *after* each statement in the start block.
25//!     for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() {
26//!         cursor.seek_after(Location { block: START_BLOCK, statement_index });
27//!         let state = cursor.get();
28//!         println!("{:?}", state);
29//!     }
30//! }
31//! ```
32//!
33//! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems
34
35use std::cmp::Ordering;
36
37use rustc_data_structures::work_queue::WorkQueue;
38use rustc_index::bit_set::{DenseBitSet, MixedBitSet};
39use rustc_index::{Idx, IndexVec};
40use rustc_middle::bug;
41use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
42use rustc_middle::ty::TyCtxt;
43use tracing::error;
44
45use self::graphviz::write_graphviz_results;
46use super::fmt::DebugWithContext;
47
48mod cursor;
49mod direction;
50pub mod fmt;
51pub mod graphviz;
52pub mod lattice;
53mod results;
54mod visitor;
55
56pub use self::cursor::ResultsCursor;
57pub use self::direction::{Backward, Direction, Forward};
58pub use self::lattice::{JoinSemiLattice, MaybeReachable};
59pub use self::results::{EntryStates, Results};
60pub use self::visitor::{ResultsVisitor, visit_results};
61
62/// Analysis domains are all bitsets of various kinds. This trait holds
63/// operations needed by all of them.
64pub trait BitSetExt<T> {
65    fn contains(&self, elem: T) -> bool;
66}
67
68impl<T: Idx> BitSetExt<T> for DenseBitSet<T> {
69    fn contains(&self, elem: T) -> bool {
70        self.contains(elem)
71    }
72}
73
74impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
75    fn contains(&self, elem: T) -> bool {
76        self.contains(elem)
77    }
78}
79
80/// A dataflow problem with an arbitrarily complex transfer function.
81///
82/// This trait specifies the lattice on which this analysis operates (the domain), its
83/// initial value at the entry point of each basic block, and various operations.
84///
85/// # Convergence
86///
87/// When implementing this trait it's possible to choose a transfer function such that the analysis
88/// does not reach fixpoint. To guarantee convergence, your transfer functions must maintain the
89/// following invariant:
90///
91/// > If the dataflow state **before** some point in the program changes to be greater
92/// than the prior state **before** that point, the dataflow state **after** that point must
93/// also change to be greater than the prior state **after** that point.
94///
95/// This invariant guarantees that the dataflow state at a given point in the program increases
96/// monotonically until fixpoint is reached. Note that this monotonicity requirement only applies
97/// to the same point in the program at different points in time. The dataflow state at a given
98/// point in the program may or may not be greater than the state at any preceding point.
99pub trait Analysis<'tcx> {
100    /// The type that holds the dataflow state at any given point in the program.
101    type Domain: Clone + JoinSemiLattice;
102
103    /// The direction of this analysis. Either `Forward` or `Backward`.
104    type Direction: Direction = Forward;
105
106    /// Auxiliary data used for analyzing `SwitchInt` terminators, if necessary.
107    type SwitchIntData = !;
108
109    /// A descriptive name for this analysis. Used only for debugging.
110    ///
111    /// This name should be brief and contain no spaces, periods or other characters that are not
112    /// suitable as part of a filename.
113    const NAME: &'static str;
114
115    /// Returns the initial value of the dataflow state upon entry to each basic block.
116    fn bottom_value(&self, body: &mir::Body<'tcx>) -> Self::Domain;
117
118    /// Mutates the initial value of the dataflow state upon entry to the `START_BLOCK`.
119    ///
120    /// For backward analyses, initial state (besides the bottom value) is not yet supported. Trying
121    /// to mutate the initial state will result in a panic.
122    //
123    // FIXME: For backward dataflow analyses, the initial state should be applied to every basic
124    // block where control flow could exit the MIR body (e.g., those terminated with `return` or
125    // `resume`). It's not obvious how to handle `yield` points in coroutines, however.
126    fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain);
127
128    /// Updates the current dataflow state with an "early" effect, i.e. one
129    /// that occurs immediately before the given statement.
130    ///
131    /// This method is useful if the consumer of the results of this analysis only needs to observe
132    /// *part* of the effect of a statement (e.g. for two-phase borrows). As a general rule,
133    /// analyses should not implement this without also implementing
134    /// `apply_primary_statement_effect`.
135    fn apply_early_statement_effect(
136        &mut self,
137        _state: &mut Self::Domain,
138        _statement: &mir::Statement<'tcx>,
139        _location: Location,
140    ) {
141    }
142
143    /// Updates the current dataflow state with the effect of evaluating a statement.
144    fn apply_primary_statement_effect(
145        &mut self,
146        state: &mut Self::Domain,
147        statement: &mir::Statement<'tcx>,
148        location: Location,
149    );
150
151    /// Updates the current dataflow state with an effect that occurs immediately *before* the
152    /// given terminator.
153    ///
154    /// This method is useful if the consumer of the results of this analysis needs only to observe
155    /// *part* of the effect of a terminator (e.g. for two-phase borrows). As a general rule,
156    /// analyses should not implement this without also implementing
157    /// `apply_primary_terminator_effect`.
158    fn apply_early_terminator_effect(
159        &mut self,
160        _state: &mut Self::Domain,
161        _terminator: &mir::Terminator<'tcx>,
162        _location: Location,
163    ) {
164    }
165
166    /// Updates the current dataflow state with the effect of evaluating a terminator.
167    ///
168    /// The effect of a successful return from a `Call` terminator should **not** be accounted for
169    /// in this function. That should go in `apply_call_return_effect`. For example, in the
170    /// `InitializedPlaces` analyses, the return place for a function call is not marked as
171    /// initialized here.
172    fn apply_primary_terminator_effect<'mir>(
173        &mut self,
174        _state: &mut Self::Domain,
175        terminator: &'mir mir::Terminator<'tcx>,
176        _location: Location,
177    ) -> TerminatorEdges<'mir, 'tcx> {
178        terminator.edges()
179    }
180
181    /* Edge-specific effects */
182
183    /// Updates the current dataflow state with the effect of a successful return from a `Call`
184    /// terminator.
185    ///
186    /// This is separate from `apply_primary_terminator_effect` to properly track state across
187    /// unwind edges.
188    fn apply_call_return_effect(
189        &mut self,
190        _state: &mut Self::Domain,
191        _block: BasicBlock,
192        _return_places: CallReturnPlaces<'_, 'tcx>,
193    ) {
194    }
195
196    /// Used to update the current dataflow state with the effect of taking a particular branch in
197    /// a `SwitchInt` terminator.
198    ///
199    /// Unlike the other edge-specific effects, which are allowed to mutate `Self::Domain`
200    /// directly, overriders of this method must return a `Self::SwitchIntData` value (wrapped in
201    /// `Some`). The `apply_switch_int_edge_effect` method will then be called once for each
202    /// outgoing edge and will have access to the dataflow state that will be propagated along that
203    /// edge, and also the `Self::SwitchIntData` value.
204    ///
205    /// This interface is somewhat more complex than the other visitor-like "effect" methods.
206    /// However, it is both more ergonomic—callers don't need to recompute or cache information
207    /// about a given `SwitchInt` terminator for each one of its edges—and more efficient—the
208    /// engine doesn't need to clone the exit state for a block unless
209    /// `get_switch_int_data` is actually called.
210    fn get_switch_int_data(
211        &mut self,
212        _block: mir::BasicBlock,
213        _discr: &mir::Operand<'tcx>,
214    ) -> Option<Self::SwitchIntData> {
215        None
216    }
217
218    /// See comments on `get_switch_int_data`.
219    fn apply_switch_int_edge_effect(
220        &mut self,
221        _data: &mut Self::SwitchIntData,
222        _state: &mut Self::Domain,
223        _edge: SwitchIntTarget,
224    ) {
225        unreachable!();
226    }
227
228    /* Extension methods */
229
230    /// Finds the fixpoint for this dataflow problem.
231    ///
232    /// You shouldn't need to override this. Its purpose is to enable method chaining like so:
233    ///
234    /// ```ignore (cross-crate-imports)
235    /// let results = MyAnalysis::new(tcx, body)
236    ///     .iterate_to_fixpoint(tcx, body, None)
237    ///     .into_results_cursor(body);
238    /// ```
239    /// You can optionally add a `pass_name` to the graphviz output for this particular run of a
240    /// dataflow analysis. Some analyses are run multiple times in the compilation pipeline.
241    /// Without a `pass_name` to differentiates them, only the results for the latest run will be
242    /// saved.
243    fn iterate_to_fixpoint<'mir>(
244        mut self,
245        tcx: TyCtxt<'tcx>,
246        body: &'mir mir::Body<'tcx>,
247        pass_name: Option<&'static str>,
248    ) -> Results<'tcx, Self>
249    where
250        Self: Sized,
251        Self::Domain: DebugWithContext<Self>,
252    {
253        let mut entry_states =
254            IndexVec::from_fn_n(|_| self.bottom_value(body), body.basic_blocks.len());
255        self.initialize_start_block(body, &mut entry_states[mir::START_BLOCK]);
256
257        if Self::Direction::IS_BACKWARD && entry_states[mir::START_BLOCK] != self.bottom_value(body)
258        {
259            bug!("`initialize_start_block` is not yet supported for backward dataflow analyses");
260        }
261
262        let mut dirty_queue: WorkQueue<BasicBlock> = WorkQueue::with_none(body.basic_blocks.len());
263
264        if Self::Direction::IS_FORWARD {
265            for (bb, _) in traversal::reverse_postorder(body) {
266                dirty_queue.insert(bb);
267            }
268        } else {
269            // Reverse post-order on the reverse CFG may generate a better iteration order for
270            // backward dataflow analyses, but probably not enough to matter.
271            for (bb, _) in traversal::postorder(body) {
272                dirty_queue.insert(bb);
273            }
274        }
275
276        // `state` is not actually used between iterations;
277        // this is just an optimization to avoid reallocating
278        // every iteration.
279        let mut state = self.bottom_value(body);
280        while let Some(bb) = dirty_queue.pop() {
281            // Set the state to the entry state of the block.
282            // This is equivalent to `state = entry_states[bb].clone()`,
283            // but it saves an allocation, thus improving compile times.
284            state.clone_from(&entry_states[bb]);
285
286            Self::Direction::apply_effects_in_block(
287                &mut self,
288                body,
289                &mut state,
290                bb,
291                &body[bb],
292                |target: BasicBlock, state: &Self::Domain| {
293                    let set_changed = entry_states[target].join(state);
294                    if set_changed {
295                        dirty_queue.insert(target);
296                    }
297                },
298            );
299        }
300
301        let mut results = Results { analysis: self, entry_states };
302
303        if tcx.sess.opts.unstable_opts.dump_mir_dataflow {
304            let res = write_graphviz_results(tcx, body, &mut results, pass_name);
305            if let Err(e) = res {
306                error!("Failed to write graphviz dataflow results: {}", e);
307            }
308        }
309
310        results
311    }
312}
313
314/// The legal operations for a transfer function in a gen/kill problem.
315pub trait GenKill<T> {
316    /// Inserts `elem` into the state vector.
317    fn gen_(&mut self, elem: T);
318
319    /// Removes `elem` from the state vector.
320    fn kill(&mut self, elem: T);
321
322    /// Calls `gen` for each element in `elems`.
323    fn gen_all(&mut self, elems: impl IntoIterator<Item = T>) {
324        for elem in elems {
325            self.gen_(elem);
326        }
327    }
328
329    /// Calls `kill` for each element in `elems`.
330    fn kill_all(&mut self, elems: impl IntoIterator<Item = T>) {
331        for elem in elems {
332            self.kill(elem);
333        }
334    }
335}
336
337impl<T: Idx> GenKill<T> for DenseBitSet<T> {
338    fn gen_(&mut self, elem: T) {
339        self.insert(elem);
340    }
341
342    fn kill(&mut self, elem: T) {
343        self.remove(elem);
344    }
345}
346
347impl<T: Idx> GenKill<T> for MixedBitSet<T> {
348    fn gen_(&mut self, elem: T) {
349        self.insert(elem);
350    }
351
352    fn kill(&mut self, elem: T) {
353        self.remove(elem);
354    }
355}
356
357impl<T, S: GenKill<T>> GenKill<T> for MaybeReachable<S> {
358    fn gen_(&mut self, elem: T) {
359        match self {
360            // If the state is not reachable, adding an element does nothing.
361            MaybeReachable::Unreachable => {}
362            MaybeReachable::Reachable(set) => set.gen_(elem),
363        }
364    }
365
366    fn kill(&mut self, elem: T) {
367        match self {
368            // If the state is not reachable, killing an element does nothing.
369            MaybeReachable::Unreachable => {}
370            MaybeReachable::Reachable(set) => set.kill(elem),
371        }
372    }
373}
374
375// NOTE: DO NOT CHANGE VARIANT ORDER. The derived `Ord` impls rely on the current order.
376#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
377enum Effect {
378    /// The "early" effect (e.g., `apply_early_statement_effect`) for a statement/terminator.
379    Early,
380
381    /// The "primary" effect (e.g., `apply_primary_statement_effect`) for a statement/terminator.
382    Primary,
383}
384
385impl Effect {
386    const fn at_index(self, statement_index: usize) -> EffectIndex {
387        EffectIndex { effect: self, statement_index }
388    }
389}
390
391#[derive(Clone, Copy, Debug, PartialEq, Eq)]
392pub struct EffectIndex {
393    statement_index: usize,
394    effect: Effect,
395}
396
397impl EffectIndex {
398    fn next_in_forward_order(self) -> Self {
399        match self.effect {
400            Effect::Early => Effect::Primary.at_index(self.statement_index),
401            Effect::Primary => Effect::Early.at_index(self.statement_index + 1),
402        }
403    }
404
405    fn next_in_backward_order(self) -> Self {
406        match self.effect {
407            Effect::Early => Effect::Primary.at_index(self.statement_index),
408            Effect::Primary => Effect::Early.at_index(self.statement_index - 1),
409        }
410    }
411
412    /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other`
413    /// in forward order.
414    fn precedes_in_forward_order(self, other: Self) -> bool {
415        let ord = self
416            .statement_index
417            .cmp(&other.statement_index)
418            .then_with(|| self.effect.cmp(&other.effect));
419        ord == Ordering::Less
420    }
421
422    /// Returns `true` if the effect at `self` should be applied earlier than the effect at `other`
423    /// in backward order.
424    fn precedes_in_backward_order(self, other: Self) -> bool {
425        let ord = other
426            .statement_index
427            .cmp(&self.statement_index)
428            .then_with(|| self.effect.cmp(&other.effect));
429        ord == Ordering::Less
430    }
431}
432
433pub struct SwitchIntTarget {
434    pub value: Option<u128>,
435    pub target: BasicBlock,
436}
437
438#[cfg(test)]
439mod tests;