Auto merge of #129283 - saethlin:unreachable-allocas, r=scottmcm
Don't alloca for unused locals We already have a concept of mono-unreachable basic blocks; this is primarily useful for ensuring that we do not compile code under an `if false`. But since we never gave locals the same analysis, a large local only used under an `if false` will still have stack space allocated for it. There are 3 places we traverse MIR during monomorphization: Inside the collector, `non_ssa_locals`, and the walk to generate code. Unfortunately, https://github.com/rust-lang/rust/pull/129283#issuecomment-2297925578 indicates that we cannot afford the expense of tracking reachable locals during the collector's traversal, so we do need at least two mono-reachable traversals. And of course caching is of no help here because the benchmarks that regress are incr-unchanged; they don't do any codegen. This fixes the second problem in https://github.com/rust-lang/rust/issues/129282, and brings us anther step toward `const if` at home.
This commit is contained in:
commit
2836482241
9 changed files with 151 additions and 54 deletions
|
@ -15,6 +15,7 @@ use crate::traits::*;
|
|||
|
||||
pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
||||
fx: &FunctionCx<'a, 'tcx, Bx>,
|
||||
traversal_order: &[mir::BasicBlock],
|
||||
) -> BitSet<mir::Local> {
|
||||
let mir = fx.mir;
|
||||
let dominators = mir.basic_blocks.dominators();
|
||||
|
@ -24,13 +25,7 @@ pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
.map(|decl| {
|
||||
let ty = fx.monomorphize(decl.ty);
|
||||
let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span);
|
||||
if layout.is_zst() {
|
||||
LocalKind::ZST
|
||||
} else if fx.cx.is_backend_immediate(layout) || fx.cx.is_backend_scalar_pair(layout) {
|
||||
LocalKind::Unused
|
||||
} else {
|
||||
LocalKind::Memory
|
||||
}
|
||||
if layout.is_zst() { LocalKind::ZST } else { LocalKind::Unused }
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
@ -44,7 +39,8 @@ pub(crate) fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
// If there exists a local definition that dominates all uses of that local,
|
||||
// the definition should be visited first. Traverse blocks in an order that
|
||||
// is a topological sort of dominance partial order.
|
||||
for (bb, data) in traversal::reverse_postorder(mir) {
|
||||
for bb in traversal_order.iter().copied() {
|
||||
let data = &mir.basic_blocks[bb];
|
||||
analyzer.visit_basic_block_data(bb, data);
|
||||
}
|
||||
|
||||
|
@ -77,11 +73,22 @@ struct LocalAnalyzer<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> {
|
|||
|
||||
impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> LocalAnalyzer<'a, 'b, 'tcx, Bx> {
|
||||
fn define(&mut self, local: mir::Local, location: DefLocation) {
|
||||
let fx = self.fx;
|
||||
let kind = &mut self.locals[local];
|
||||
let decl = &fx.mir.local_decls[local];
|
||||
match *kind {
|
||||
LocalKind::ZST => {}
|
||||
LocalKind::Memory => {}
|
||||
LocalKind::Unused => *kind = LocalKind::SSA(location),
|
||||
LocalKind::Unused => {
|
||||
let ty = fx.monomorphize(decl.ty);
|
||||
let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span);
|
||||
*kind =
|
||||
if fx.cx.is_backend_immediate(layout) || fx.cx.is_backend_scalar_pair(layout) {
|
||||
LocalKind::SSA(location)
|
||||
} else {
|
||||
LocalKind::Memory
|
||||
};
|
||||
}
|
||||
LocalKind::SSA(_) => *kind = LocalKind::Memory,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -218,7 +218,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
|
||||
fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut start_bx);
|
||||
|
||||
let memory_locals = analyze::non_ssa_locals(&fx);
|
||||
let traversal_order = traversal::mono_reachable_reverse_postorder(mir, cx.tcx(), instance);
|
||||
let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order);
|
||||
|
||||
// Allocate variable and temp allocas
|
||||
let local_values = {
|
||||
|
@ -277,17 +278,20 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
// So drop the builder of `start_llbb` to avoid having two at the same time.
|
||||
drop(start_bx);
|
||||
|
||||
let reachable_blocks = traversal::mono_reachable_as_bitset(mir, cx.tcx(), instance);
|
||||
let mut unreached_blocks = BitSet::new_filled(mir.basic_blocks.len());
|
||||
// Codegen the body of each reachable block using our reverse postorder list.
|
||||
for bb in traversal_order {
|
||||
fx.codegen_block(bb);
|
||||
unreached_blocks.remove(bb);
|
||||
}
|
||||
|
||||
// Codegen the body of each block using reverse postorder
|
||||
for (bb, _) in traversal::reverse_postorder(mir) {
|
||||
if reachable_blocks.contains(bb) {
|
||||
fx.codegen_block(bb);
|
||||
} else {
|
||||
// We want to skip this block, because it's not reachable. But we still create
|
||||
// the block so terminators in other blocks can reference it.
|
||||
fx.codegen_block_as_unreachable(bb);
|
||||
}
|
||||
// FIXME: These empty unreachable blocks are *mostly* a waste. They are occasionally
|
||||
// targets for a SwitchInt terminator, but the reimplementation of the mono-reachable
|
||||
// simplification in SwitchInt lowering sometimes misses cases that
|
||||
// mono_reachable_reverse_postorder manages to figure out.
|
||||
// The solution is to do something like post-mono GVN. But for now we have this hack.
|
||||
for bb in unreached_blocks.iter() {
|
||||
fx.codegen_block_as_unreachable(bb);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ impl<'tcx> BasicBlocks<'tcx> {
|
|||
#[inline]
|
||||
pub fn reverse_postorder(&self) -> &[BasicBlock] {
|
||||
self.cache.reverse_postorder.get_or_init(|| {
|
||||
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK).collect();
|
||||
let mut rpo: Vec<_> = Postorder::new(&self.basic_blocks, START_BLOCK, ()).collect();
|
||||
rpo.reverse();
|
||||
rpo
|
||||
})
|
||||
|
|
|
@ -1424,6 +1424,19 @@ impl<'tcx> BasicBlockData<'tcx> {
|
|||
pub fn is_empty_unreachable(&self) -> bool {
|
||||
self.statements.is_empty() && matches!(self.terminator().kind, TerminatorKind::Unreachable)
|
||||
}
|
||||
|
||||
/// Like [`Terminator::successors`] but tries to use information available from the [`Instance`]
|
||||
/// to skip successors like the `false` side of an `if const {`.
|
||||
///
|
||||
/// This is used to implement [`traversal::mono_reachable`] and
|
||||
/// [`traversal::mono_reachable_reverse_postorder`].
|
||||
pub fn mono_successors(&self, tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Successors<'_> {
|
||||
if let Some((bits, targets)) = Body::try_const_mono_switchint(tcx, instance, self) {
|
||||
targets.successors_for_value(bits)
|
||||
} else {
|
||||
self.terminator().successors()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -413,6 +413,17 @@ mod helper {
|
|||
use super::*;
|
||||
pub type Successors<'a> = impl DoubleEndedIterator<Item = BasicBlock> + 'a;
|
||||
pub type SuccessorsMut<'a> = impl DoubleEndedIterator<Item = &'a mut BasicBlock> + 'a;
|
||||
|
||||
impl SwitchTargets {
|
||||
/// Like [`SwitchTargets::target_for_value`], but returning the same type as
|
||||
/// [`Terminator::successors`].
|
||||
#[inline]
|
||||
pub fn successors_for_value(&self, value: u128) -> Successors<'_> {
|
||||
let target = self.target_for_value(value);
|
||||
(&[]).into_iter().copied().chain(Some(target))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx> TerminatorKind<'tcx> {
|
||||
#[inline]
|
||||
pub fn successors(&self) -> Successors<'_> {
|
||||
|
|
|
@ -104,36 +104,46 @@ impl<'a, 'tcx> Iterator for Preorder<'a, 'tcx> {
|
|||
/// ```
|
||||
///
|
||||
/// A Postorder traversal of this graph is `D B C A` or `D C B A`
|
||||
pub struct Postorder<'a, 'tcx> {
|
||||
pub struct Postorder<'a, 'tcx, C> {
|
||||
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
|
||||
visited: BitSet<BasicBlock>,
|
||||
visit_stack: Vec<(BasicBlock, Successors<'a>)>,
|
||||
root_is_start_block: bool,
|
||||
extra: C,
|
||||
}
|
||||
|
||||
impl<'a, 'tcx> Postorder<'a, 'tcx> {
|
||||
impl<'a, 'tcx, C> Postorder<'a, 'tcx, C>
|
||||
where
|
||||
C: Customization<'tcx>,
|
||||
{
|
||||
pub fn new(
|
||||
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
|
||||
root: BasicBlock,
|
||||
) -> Postorder<'a, 'tcx> {
|
||||
extra: C,
|
||||
) -> Postorder<'a, 'tcx, C> {
|
||||
let mut po = Postorder {
|
||||
basic_blocks,
|
||||
visited: BitSet::new_empty(basic_blocks.len()),
|
||||
visit_stack: Vec::new(),
|
||||
root_is_start_block: root == START_BLOCK,
|
||||
extra,
|
||||
};
|
||||
|
||||
let data = &po.basic_blocks[root];
|
||||
|
||||
if let Some(ref term) = data.terminator {
|
||||
po.visited.insert(root);
|
||||
po.visit_stack.push((root, term.successors()));
|
||||
po.traverse_successor();
|
||||
}
|
||||
po.visit(root);
|
||||
po.traverse_successor();
|
||||
|
||||
po
|
||||
}
|
||||
|
||||
fn visit(&mut self, bb: BasicBlock) {
|
||||
if !self.visited.insert(bb) {
|
||||
return;
|
||||
}
|
||||
let data = &self.basic_blocks[bb];
|
||||
let successors = C::successors(data, self.extra);
|
||||
self.visit_stack.push((bb, successors));
|
||||
}
|
||||
|
||||
fn traverse_successor(&mut self) {
|
||||
// This is quite a complex loop due to 1. the borrow checker not liking it much
|
||||
// and 2. what exactly is going on is not clear
|
||||
|
@ -183,16 +193,15 @@ impl<'a, 'tcx> Postorder<'a, 'tcx> {
|
|||
// since we've already visited `E`, that child isn't added to the stack. The last
|
||||
// two iterations yield `B` and finally `A` for a final traversal of [E, D, C, B, A]
|
||||
while let Some(bb) = self.visit_stack.last_mut().and_then(|(_, iter)| iter.next_back()) {
|
||||
if self.visited.insert(bb) {
|
||||
if let Some(term) = &self.basic_blocks[bb].terminator {
|
||||
self.visit_stack.push((bb, term.successors()));
|
||||
}
|
||||
}
|
||||
self.visit(bb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx> Iterator for Postorder<'_, 'tcx> {
|
||||
impl<'tcx, C> Iterator for Postorder<'_, 'tcx, C>
|
||||
where
|
||||
C: Customization<'tcx>,
|
||||
{
|
||||
type Item = BasicBlock;
|
||||
|
||||
fn next(&mut self) -> Option<BasicBlock> {
|
||||
|
@ -232,6 +241,40 @@ pub fn postorder<'a, 'tcx>(
|
|||
reverse_postorder(body).rev()
|
||||
}
|
||||
|
||||
/// Lets us plug in some additional logic and data into a Postorder traversal. Or not.
|
||||
pub trait Customization<'tcx>: Copy {
|
||||
fn successors<'a>(_: &'a BasicBlockData<'tcx>, _: Self) -> Successors<'a>;
|
||||
}
|
||||
|
||||
impl<'tcx> Customization<'tcx> for () {
|
||||
fn successors<'a>(data: &'a BasicBlockData<'tcx>, _: ()) -> Successors<'a> {
|
||||
data.terminator().successors()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'tcx> Customization<'tcx> for (TyCtxt<'tcx>, Instance<'tcx>) {
|
||||
fn successors<'a>(
|
||||
data: &'a BasicBlockData<'tcx>,
|
||||
(tcx, instance): (TyCtxt<'tcx>, Instance<'tcx>),
|
||||
) -> Successors<'a> {
|
||||
data.mono_successors(tcx, instance)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mono_reachable_reverse_postorder<'a, 'tcx>(
|
||||
body: &'a Body<'tcx>,
|
||||
tcx: TyCtxt<'tcx>,
|
||||
instance: Instance<'tcx>,
|
||||
) -> Vec<BasicBlock> {
|
||||
let mut iter = Postorder::new(&body.basic_blocks, START_BLOCK, (tcx, instance));
|
||||
let mut items = Vec::with_capacity(body.basic_blocks.len());
|
||||
while let Some(block) = iter.next() {
|
||||
items.push(block);
|
||||
}
|
||||
items.reverse();
|
||||
items
|
||||
}
|
||||
|
||||
/// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular
|
||||
/// order.
|
||||
///
|
||||
|
@ -358,14 +401,8 @@ impl<'a, 'tcx> Iterator for MonoReachable<'a, 'tcx> {
|
|||
|
||||
let data = &self.body[idx];
|
||||
|
||||
if let Some((bits, targets)) =
|
||||
Body::try_const_mono_switchint(self.tcx, self.instance, data)
|
||||
{
|
||||
let target = targets.target_for_value(bits);
|
||||
self.add_work([target]);
|
||||
} else {
|
||||
self.add_work(data.terminator().successors());
|
||||
}
|
||||
let targets = data.mono_successors(self.tcx, self.instance);
|
||||
self.add_work(targets);
|
||||
|
||||
return Some((idx, data));
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ pub fn visit_local_usage(locals: &[Local], mir: &Body<'_>, location: Location) -
|
|||
locals.len()
|
||||
];
|
||||
|
||||
traversal::Postorder::new(&mir.basic_blocks, location.block)
|
||||
traversal::Postorder::new(&mir.basic_blocks, location.block, ())
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
.rev()
|
||||
|
|
|
@ -7,18 +7,19 @@
|
|||
// CHECK-LABEL: @if_bool
|
||||
#[no_mangle]
|
||||
pub fn if_bool() {
|
||||
// CHECK: br label %{{.+}}
|
||||
// CHECK-NOT: br i1
|
||||
// CHECK-NOT: switch
|
||||
_ = if true { 0 } else { 1 };
|
||||
|
||||
// CHECK: br label %{{.+}}
|
||||
_ = if false { 0 } else { 1 };
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @if_constant_int_eq
|
||||
#[no_mangle]
|
||||
pub fn if_constant_int_eq() {
|
||||
// CHECK-NOT: br i1
|
||||
// CHECK-NOT: switch
|
||||
let val = 0;
|
||||
// CHECK: br label %{{.+}}
|
||||
_ = if val == 0 { 0 } else { 1 };
|
||||
|
||||
// CHECK: br label %{{.+}}
|
||||
|
@ -28,23 +29,20 @@ pub fn if_constant_int_eq() {
|
|||
// CHECK-LABEL: @if_constant_match
|
||||
#[no_mangle]
|
||||
pub fn if_constant_match() {
|
||||
// CHECK: br label %{{.+}}
|
||||
// CHECK-NOT: br i1
|
||||
// CHECK-NOT: switch
|
||||
_ = match 1 {
|
||||
1 => 2,
|
||||
2 => 3,
|
||||
_ => 4,
|
||||
};
|
||||
|
||||
// CHECK: br label %{{.+}}
|
||||
_ = match 1 {
|
||||
2 => 3,
|
||||
_ => 4,
|
||||
};
|
||||
|
||||
// CHECK: br label %[[MINUS1:.+]]
|
||||
_ = match -1 {
|
||||
// CHECK: [[MINUS1]]:
|
||||
// CHECK: store i32 1
|
||||
-1 => 1,
|
||||
_ => 0,
|
||||
}
|
||||
|
|
27
tests/codegen/no-alloca-inside-if-false.rs
Normal file
27
tests/codegen/no-alloca-inside-if-false.rs
Normal file
|
@ -0,0 +1,27 @@
|
|||
//@ compile-flags: -Cno-prepopulate-passes -Copt-level=0 -Cpanic=abort
|
||||
// Check that there's an alloca for the reference and the vector, but nothing else.
|
||||
// We use panic=abort because unwinding panics give hint::black_box a cleanup block, which has
|
||||
// another alloca.
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
||||
#[inline(never)]
|
||||
fn test<const SIZE: usize>() {
|
||||
// CHECK-LABEL: no_alloca_inside_if_false::test
|
||||
// CHECK: start:
|
||||
// CHECK-NEXT: alloca [{{12|24}} x i8]
|
||||
// CHECK-NOT: alloca
|
||||
if const { SIZE < 4096 } {
|
||||
let arr = [0u8; SIZE];
|
||||
std::hint::black_box(&arr);
|
||||
} else {
|
||||
let vec = vec![0u8; SIZE];
|
||||
std::hint::black_box(&vec);
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @main
|
||||
#[no_mangle]
|
||||
pub fn main() {
|
||||
test::<8192>();
|
||||
}
|
Loading…
Add table
Reference in a new issue