Rollup merge of #119077 - tmiasko:lint, r=cjgillot

Separate MIR lints from validation

Add a MIR lint pass, enabled with -Zlint-mir, which identifies undefined or
likely erroneous behaviour.

The initial implementation mostly migrates existing checks of this nature from
MIR validator, where they did not belong (those checks have false positives and
there is nothing inherently invalid about MIR with undefined behaviour).

Fixes #104736
Fixes #104843
Fixes #116079
Fixes #116736
Fixes #118990
This commit is contained in:
Michael Goulet 2023-12-22 21:41:03 -05:00 committed by GitHub
commit 7dd095598b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 196 additions and 56 deletions

View file

@ -8,9 +8,6 @@ use rustc_middle::mir::interpret::Scalar;
use rustc_middle::mir::visit::{NonUseContext, PlaceContext, Visitor};
use rustc_middle::mir::*;
use rustc_middle::ty::{self, InstanceDef, ParamEnv, Ty, TyCtxt, TypeVisitableExt, Variance};
use rustc_mir_dataflow::impls::MaybeStorageLive;
use rustc_mir_dataflow::storage::always_storage_live_locals;
use rustc_mir_dataflow::{Analysis, ResultsCursor};
use rustc_target::abi::{Size, FIRST_VARIANT};
use rustc_target::spec::abi::Abi;
@ -51,12 +48,6 @@ impl<'tcx> MirPass<'tcx> for Validator {
Reveal::All => tcx.param_env_reveal_all_normalized(def_id),
};
let always_live_locals = always_storage_live_locals(body);
let storage_liveness = MaybeStorageLive::new(std::borrow::Cow::Owned(always_live_locals))
.into_engine(tcx, body)
.iterate_to_fixpoint()
.into_results_cursor(body);
let can_unwind = if mir_phase <= MirPhase::Runtime(RuntimePhase::Initial) {
// In this case `AbortUnwindingCalls` haven't yet been executed.
true
@ -83,7 +74,6 @@ impl<'tcx> MirPass<'tcx> for Validator {
mir_phase,
unwind_edge_count: 0,
reachable_blocks: traversal::reachable_as_bitset(body),
storage_liveness,
place_cache: FxHashSet::default(),
value_cache: FxHashSet::default(),
can_unwind,
@ -116,7 +106,6 @@ struct CfgChecker<'a, 'tcx> {
mir_phase: MirPhase,
unwind_edge_count: usize,
reachable_blocks: BitSet<BasicBlock>,
storage_liveness: ResultsCursor<'a, 'tcx, MaybeStorageLive<'static>>,
place_cache: FxHashSet<PlaceRef<'tcx>>,
value_cache: FxHashSet<u128>,
// If `false`, then the MIR must not contain `UnwindAction::Continue` or
@ -294,28 +283,13 @@ impl<'a, 'tcx> CfgChecker<'a, 'tcx> {
}
impl<'a, 'tcx> Visitor<'tcx> for CfgChecker<'a, 'tcx> {
fn visit_local(&mut self, local: Local, context: PlaceContext, location: Location) {
fn visit_local(&mut self, local: Local, _context: PlaceContext, location: Location) {
if self.body.local_decls.get(local).is_none() {
self.fail(
location,
format!("local {local:?} has no corresponding declaration in `body.local_decls`"),
);
}
if self.reachable_blocks.contains(location.block) && context.is_use() {
// We check that the local is live whenever it is used. Technically, violating this
// restriction is only UB and not actually indicative of not well-formed MIR. This means
// that an optimization which turns MIR that already has UB into MIR that fails this
// check is not necessarily wrong. However, we have no such optimizations at the moment,
// and so we include this check anyway to help us catch bugs. If you happen to write an
// optimization that might cause this to incorrectly fire, feel free to remove this
// check.
self.storage_liveness.seek_after_primary_effect(location);
let locals_with_storage = self.storage_liveness.get();
if !locals_with_storage.contains(local) {
self.fail(location, format!("use of local {local:?}, which has no storage here"));
}
}
}
fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
@ -367,26 +341,8 @@ impl<'a, 'tcx> Visitor<'tcx> for CfgChecker<'a, 'tcx> {
self.fail(location, format!("explicit `{kind:?}` is forbidden"));
}
}
StatementKind::StorageLive(local) => {
// We check that the local is not live when entering a `StorageLive` for it.
// Technically, violating this restriction is only UB and not actually indicative
// of not well-formed MIR. This means that an optimization which turns MIR that
// already has UB into MIR that fails this check is not necessarily wrong. However,
// we have no such optimizations at the moment, and so we include this check anyway
// to help us catch bugs. If you happen to write an optimization that might cause
// this to incorrectly fire, feel free to remove this check.
if self.reachable_blocks.contains(location.block) {
self.storage_liveness.seek_before_primary_effect(location);
let locals_with_storage = self.storage_liveness.get();
if locals_with_storage.contains(*local) {
self.fail(
location,
format!("StorageLive({local:?}) which already has storage here"),
);
}
}
}
StatementKind::StorageDead(_)
StatementKind::StorageLive(_)
| StatementKind::StorageDead(_)
| StatementKind::Intrinsic(_)
| StatementKind::Coverage(_)
| StatementKind::ConstEvalCounter

View file

@ -81,17 +81,17 @@ impl<'tcx, 'a> crate::GenKillAnalysis<'tcx> for MaybeStorageLive<'a> {
}
#[derive(Clone)]
pub struct MaybeStorageDead {
always_live_locals: BitSet<Local>,
pub struct MaybeStorageDead<'a> {
always_live_locals: Cow<'a, BitSet<Local>>,
}
impl MaybeStorageDead {
pub fn new(always_live_locals: BitSet<Local>) -> Self {
impl<'a> MaybeStorageDead<'a> {
pub fn new(always_live_locals: Cow<'a, BitSet<Local>>) -> Self {
MaybeStorageDead { always_live_locals }
}
}
impl<'tcx> crate::AnalysisDomain<'tcx> for MaybeStorageDead {
impl<'tcx, 'a> crate::AnalysisDomain<'tcx> for MaybeStorageDead<'a> {
type Domain = BitSet<Local>;
const NAME: &'static str = "maybe_storage_dead";
@ -112,7 +112,7 @@ impl<'tcx> crate::AnalysisDomain<'tcx> for MaybeStorageDead {
}
}
impl<'tcx> crate::GenKillAnalysis<'tcx> for MaybeStorageDead {
impl<'tcx, 'a> crate::GenKillAnalysis<'tcx> for MaybeStorageDead<'a> {
type Idx = Local;
fn domain_size(&self, body: &Body<'tcx>) -> usize {

View file

@ -86,6 +86,7 @@ pub mod inline;
mod instsimplify;
mod jump_threading;
mod large_enums;
mod lint;
mod lower_intrinsics;
mod lower_slice_len;
mod match_branches;

View file

@ -0,0 +1,119 @@
//! This pass statically detects code which has undefined behaviour or is likely to be erroneous.
//! It can be used to locate problems in MIR building or optimizations. It assumes that all code
//! can be executed, so it has false positives.
use rustc_index::bit_set::BitSet;
use rustc_middle::mir::visit::{PlaceContext, Visitor};
use rustc_middle::mir::*;
use rustc_middle::ty::TyCtxt;
use rustc_mir_dataflow::impls::{MaybeStorageDead, MaybeStorageLive};
use rustc_mir_dataflow::storage::always_storage_live_locals;
use rustc_mir_dataflow::{Analysis, ResultsCursor};
use std::borrow::Cow;
pub fn lint_body<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>, when: String) {
let reachable_blocks = traversal::reachable_as_bitset(body);
let always_live_locals = &always_storage_live_locals(body);
let maybe_storage_live = MaybeStorageLive::new(Cow::Borrowed(always_live_locals))
.into_engine(tcx, body)
.iterate_to_fixpoint()
.into_results_cursor(body);
let maybe_storage_dead = MaybeStorageDead::new(Cow::Borrowed(always_live_locals))
.into_engine(tcx, body)
.iterate_to_fixpoint()
.into_results_cursor(body);
Lint {
tcx,
when,
body,
is_fn_like: tcx.def_kind(body.source.def_id()).is_fn_like(),
always_live_locals,
reachable_blocks,
maybe_storage_live,
maybe_storage_dead,
}
.visit_body(body);
}
struct Lint<'a, 'tcx> {
tcx: TyCtxt<'tcx>,
when: String,
body: &'a Body<'tcx>,
is_fn_like: bool,
always_live_locals: &'a BitSet<Local>,
reachable_blocks: BitSet<BasicBlock>,
maybe_storage_live: ResultsCursor<'a, 'tcx, MaybeStorageLive<'a>>,
maybe_storage_dead: ResultsCursor<'a, 'tcx, MaybeStorageDead<'a>>,
}
impl<'a, 'tcx> Lint<'a, 'tcx> {
#[track_caller]
fn fail(&self, location: Location, msg: impl AsRef<str>) {
let span = self.body.source_info(location).span;
self.tcx.sess.dcx().span_delayed_bug(
span,
format!(
"broken MIR in {:?} ({}) at {:?}:\n{}",
self.body.source.instance,
self.when,
location,
msg.as_ref()
),
);
}
}
impl<'a, 'tcx> Visitor<'tcx> for Lint<'a, 'tcx> {
fn visit_local(&mut self, local: Local, context: PlaceContext, location: Location) {
if self.reachable_blocks.contains(location.block) && context.is_use() {
self.maybe_storage_dead.seek_after_primary_effect(location);
if self.maybe_storage_dead.get().contains(local) {
self.fail(location, format!("use of local {local:?}, which has no storage here"));
}
}
}
fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
match statement.kind {
StatementKind::StorageLive(local) => {
if self.reachable_blocks.contains(location.block) {
self.maybe_storage_live.seek_before_primary_effect(location);
if self.maybe_storage_live.get().contains(local) {
self.fail(
location,
format!("StorageLive({local:?}) which already has storage here"),
);
}
}
}
_ => {}
}
self.super_statement(statement, location);
}
fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location) {
match terminator.kind {
TerminatorKind::Return => {
if self.is_fn_like && self.reachable_blocks.contains(location.block) {
self.maybe_storage_live.seek_after_primary_effect(location);
for local in self.maybe_storage_live.get().iter() {
if !self.always_live_locals.contains(local) {
self.fail(
location,
format!(
"local {local:?} still has storage when returning from function"
),
);
}
}
}
}
_ => {}
}
self.super_terminator(terminator, location);
}
}

View file

@ -2,7 +2,7 @@ use rustc_middle::mir::{self, Body, MirPhase, RuntimePhase};
use rustc_middle::ty::TyCtxt;
use rustc_session::Session;
use crate::{validate, MirPass};
use crate::{lint::lint_body, validate, MirPass};
/// Just like `MirPass`, except it cannot mutate `Body`.
pub trait MirLint<'tcx> {
@ -109,6 +109,7 @@ fn run_passes_inner<'tcx>(
phase_change: Option<MirPhase>,
validate_each: bool,
) {
let lint = tcx.sess.opts.unstable_opts.lint_mir & !body.should_skip();
let validate = validate_each & tcx.sess.opts.unstable_opts.validate_mir & !body.should_skip();
let overridden_passes = &tcx.sess.opts.unstable_opts.mir_enable_passes;
trace!(?overridden_passes);
@ -131,6 +132,9 @@ fn run_passes_inner<'tcx>(
if validate {
validate_body(tcx, body, format!("before pass {name}"));
}
if lint {
lint_body(tcx, body, format!("before pass {name}"));
}
if let Some(prof_arg) = &prof_arg {
tcx.sess
@ -147,6 +151,9 @@ fn run_passes_inner<'tcx>(
if validate {
validate_body(tcx, body, format!("after pass {name}"));
}
if lint {
lint_body(tcx, body, format!("after pass {name}"));
}
body.pass_count += 1;
}
@ -164,6 +171,9 @@ fn run_passes_inner<'tcx>(
if validate || new_phase == MirPhase::Runtime(RuntimePhase::Optimized) {
validate_body(tcx, body, format!("after phase change to {}", new_phase.name()));
}
if lint {
lint_body(tcx, body, format!("after phase change to {}", new_phase.name()));
}
body.pass_count = 1;
}

View file

@ -7,6 +7,7 @@ use rustc_middle::ty::TyCtxt;
use rustc_mir_dataflow::impls::MaybeStorageDead;
use rustc_mir_dataflow::storage::always_storage_live_locals;
use rustc_mir_dataflow::Analysis;
use std::borrow::Cow;
use crate::ssa::{SsaLocals, StorageLiveLocals};
@ -120,7 +121,7 @@ fn compute_replacement<'tcx>(
// Compute `MaybeStorageDead` dataflow to check that we only replace when the pointee is
// definitely live.
let mut maybe_dead = MaybeStorageDead::new(always_live_locals)
let mut maybe_dead = MaybeStorageDead::new(Cow::Owned(always_live_locals))
.into_engine(tcx, body)
.iterate_to_fixpoint()
.into_results_cursor(body);

View file

@ -1694,6 +1694,8 @@ options! {
"link native libraries in the linker invocation (default: yes)"),
link_only: bool = (false, parse_bool, [TRACKED],
"link the `.rlink` file generated by `-Z no-link` (default: no)"),
lint_mir: bool = (false, parse_bool, [UNTRACKED],
"lint MIR before and after each transformation"),
llvm_module_flag: Vec<(String, u32, String)> = (Vec::new(), parse_llvm_module_flag, [TRACKED],
"a list of module flags to pass to LLVM (space separated)"),
llvm_plugins: Vec<String> = (Vec::new(), parse_list, [TRACKED],

View file

@ -2445,6 +2445,7 @@ impl<'test> TestCx<'test> {
"-Copt-level=1",
&zdump_arg,
"-Zvalidate-mir",
"-Zlint-mir",
"-Zdump-mir-exclude-pass-number",
]);
if let Some(pass) = &self.props.mir_unit_test {

View file

@ -1,3 +1,4 @@
// compile-flags: -Zlint-mir=no
// unit-test: ReferencePropagation
// needs-unwind

View file

@ -0,0 +1,30 @@
// compile-flags: -Zlint-mir --crate-type=lib -Ztreat-err-as-bug
// failure-status: 101
// dont-check-compiler-stderr
// regex-error-pattern: use of local .*, which has no storage here
#![feature(custom_mir, core_intrinsics)]
extern crate core;
use core::intrinsics::mir::*;
#[custom_mir(dialect = "built")]
pub fn f(a: bool) {
mir!(
let b: ();
{
match a { true => bb1, _ => bb2 }
}
bb1 = {
StorageLive(b);
Goto(bb3)
}
bb2 = {
Goto(bb3)
}
bb3 = {
b = ();
RET = b;
StorageDead(b);
Return()
}
)
}

View file

@ -1,4 +1,4 @@
// compile-flags: -Zvalidate-mir -Ztreat-err-as-bug
// compile-flags: -Zlint-mir -Ztreat-err-as-bug
// failure-status: 101
// error-pattern: broken MIR in
// error-pattern: StorageLive(_1) which already has storage here

View file

@ -0,0 +1,19 @@
// compile-flags: -Zlint-mir -Ztreat-err-as-bug
// failure-status: 101
// dont-check-compiler-stderr
// error-pattern: has storage when returning
#![feature(custom_mir, core_intrinsics)]
extern crate core;
use core::intrinsics::mir::*;
#[custom_mir(dialect = "built")]
fn main() {
mir!(
let a: ();
{
StorageLive(a);
RET = a;
Return()
}
)
}