diff --git a/Cargo.lock b/Cargo.lock index 5adfd6c34940..045e1268677e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -809,6 +809,7 @@ name = "cranelift-entity" version = "0.128.0" dependencies = [ "cranelift-bitset", + "im-rc", "serde", "serde_derive", ] diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index 9993f7941f10..93961bc7876a 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -603,7 +603,7 @@ where // The initial best choice is "no simplification, just use the original // instruction" which has the original instruction's cost. let mut best = None; - let mut best_cost = cost::Cost::of_skeleton_op( + let mut best_cost = cost::ScalarCost::of_skeleton_op( ctx.func.dfg.insts[inst].opcode(), ctx.func.dfg.inst_args(inst).len(), ); @@ -682,7 +682,7 @@ where // Our best simplification is the one with the least cost. Update // `best` if necessary. - let cost = cost::Cost::of_skeleton_op( + let cost = cost::ScalarCost::of_skeleton_op( ctx.func.dfg.insts[new_inst].opcode(), ctx.func.dfg.inst_args(new_inst).len(), ); diff --git a/cranelift/codegen/src/egraph/cost.rs b/cranelift/codegen/src/egraph/cost.rs index 1ff56fcd61f9..abc8e6fc5854 100644 --- a/cranelift/codegen/src/egraph/cost.rs +++ b/cranelift/codegen/src/egraph/cost.rs @@ -1,6 +1,97 @@ //! Cost functions for egraph representation. -use crate::ir::Opcode; +use crate::ir::{DataFlowGraph, Inst, Opcode}; +use cranelift_entity::ImmutableEntitySet; + +/// The compound cost of an expression. +/// +/// Tracks the set instructions that make up this expression and sums their +/// costs, avoiding "double counting" the costs of values that were defined by +/// the same instruction and values that appear multiple times within the +/// expression (i.e. the expression is a DAG and not a tree). +#[derive(Clone, Debug)] +pub(crate) struct ExprCost { + // The total cost of this expression. + total: ScalarCost, + // The set of instructions that must be evaluated to produce the associated + // expression. + insts: ImmutableEntitySet, +} + +impl Ord for ExprCost { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.total.cmp(&other.total) + } +} + +impl PartialOrd for ExprCost { + fn partial_cmp(&self, other: &Self) -> Option { + self.total.partial_cmp(&other.total) + } +} + +impl PartialEq for ExprCost { + fn eq(&self, other: &Self) -> bool { + self.total == other.total + } +} + +impl Eq for ExprCost {} + +impl ExprCost { + /// Create an `ExprCost` with zero total cost and an empty set of + /// instructions. + pub fn zero() -> Self { + Self { + total: ScalarCost::zero(), + insts: ImmutableEntitySet::default(), + } + } + + /// Create the cost for just the given instruction. + pub fn for_inst(dfg: &DataFlowGraph, inst: Inst) -> Self { + Self { + total: ScalarCost::of_opcode(dfg.insts[inst].opcode()), + insts: ImmutableEntitySet::unit(inst), + } + } + + /// Add the other cost into this cost, unioning its set of instructions into + /// this cost's set, and only incrementing the total cost for new + /// instructions. + pub fn add(&mut self, dfg: &DataFlowGraph, other: &Self) { + match (self.insts.len(), other.insts.len()) { + // Nothing to do in this case. + (_, 0) => {} + + // Clone `other` into `self` so that we reuse its set allocations. + (0, _) => { + *self = other.clone(); + } + + // Commute the addition so that we are (a) iterating over the + // smaller of the two sets, and (b) maximizing reuse of existing set + // allocations. + (a, b) if a < b => { + let mut other = other.clone(); + for inst in self.insts.iter() { + if other.insts.insert(inst) { + other.total = other.total + ScalarCost::of_opcode(dfg.insts[inst].opcode()); + } + } + *self = other; + } + + _ => { + for inst in other.insts.iter() { + if self.insts.insert(inst) { + self.total = self.total + ScalarCost::of_opcode(dfg.insts[inst].opcode()); + } + } + } + } + } +} /// A cost of computing some value in the program. /// @@ -31,11 +122,11 @@ use crate::ir::Opcode; /// that cannot be computed, or otherwise serve as a sentinel when /// performing search for the lowest-cost representation of a value. #[derive(Clone, Copy, PartialEq, Eq)] -pub(crate) struct Cost(u32); +pub(crate) struct ScalarCost(u32); -impl core::fmt::Debug for Cost { +impl core::fmt::Debug for ScalarCost { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - if *self == Cost::infinity() { + if *self == ScalarCost::infinity() { write!(f, "Cost::Infinite") } else { f.debug_struct("Cost::Finite") @@ -46,7 +137,7 @@ impl core::fmt::Debug for Cost { } } -impl Ord for Cost { +impl Ord for ScalarCost { #[inline] fn cmp(&self, other: &Self) -> std::cmp::Ordering { // We make sure that the high bits are the op cost and the low bits are @@ -63,38 +154,38 @@ impl Ord for Cost { } } -impl PartialOrd for Cost { +impl PartialOrd for ScalarCost { #[inline] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Cost { +impl ScalarCost { const DEPTH_BITS: u8 = 8; const DEPTH_MASK: u32 = (1 << Self::DEPTH_BITS) - 1; const OP_COST_MASK: u32 = !Self::DEPTH_MASK; const MAX_OP_COST: u32 = Self::OP_COST_MASK >> Self::DEPTH_BITS; - pub(crate) fn infinity() -> Cost { + pub(crate) fn infinity() -> ScalarCost { // 2^32 - 1 is, uh, pretty close to infinite... (we use `Cost` // only for heuristics and always saturate so this suffices!) - Cost(u32::MAX) + ScalarCost(u32::MAX) } - pub(crate) fn zero() -> Cost { - Cost(0) + pub(crate) fn zero() -> ScalarCost { + ScalarCost(0) } /// Construct a new `Cost` from the given parts. /// /// If the opcode cost is greater than or equal to the maximum representable /// opcode cost, then the resulting `Cost` saturates to infinity. - fn new(opcode_cost: u32, depth: u8) -> Cost { + fn new(opcode_cost: u32, depth: u8) -> ScalarCost { if opcode_cost >= Self::MAX_OP_COST { Self::infinity() } else { - Cost(opcode_cost << Self::DEPTH_BITS | u32::from(depth)) + ScalarCost(opcode_cost << Self::DEPTH_BITS | u32::from(depth)) } } @@ -108,17 +199,17 @@ impl Cost { } /// Return the cost of an opcode. - fn of_opcode(op: Opcode) -> Cost { + pub(crate) fn of_opcode(op: Opcode) -> ScalarCost { match op { // Constants. - Opcode::Iconst | Opcode::F32const | Opcode::F64const => Cost::new(1, 0), + Opcode::Iconst | Opcode::F32const | Opcode::F64const => ScalarCost::new(1, 0), // Extends/reduces. Opcode::Uextend | Opcode::Sextend | Opcode::Ireduce | Opcode::Iconcat - | Opcode::Isplit => Cost::new(1, 0), + | Opcode::Isplit => ScalarCost::new(1, 0), // "Simple" arithmetic. Opcode::Iadd @@ -129,27 +220,27 @@ impl Cost { | Opcode::Bnot | Opcode::Ishl | Opcode::Ushr - | Opcode::Sshr => Cost::new(3, 0), + | Opcode::Sshr => ScalarCost::new(3, 0), // "Expensive" arithmetic. - Opcode::Imul => Cost::new(10, 0), + Opcode::Imul => ScalarCost::new(10, 0), // Everything else. _ => { // By default, be slightly more expensive than "simple" // arithmetic. - let mut c = Cost::new(4, 0); + let mut c = ScalarCost::new(4, 0); // And then get more expensive as the opcode does more side // effects. if op.can_trap() || op.other_side_effects() { - c = c + Cost::new(10, 0); + c = c + ScalarCost::new(10, 0); } if op.can_load() { - c = c + Cost::new(20, 0); + c = c + ScalarCost::new(20, 0); } if op.can_store() { - c = c + Cost::new(50, 0); + c = c + ScalarCost::new(50, 0); } c @@ -157,40 +248,32 @@ impl Cost { } } - /// Compute the cost of the operation and its given operands. - /// - /// Caller is responsible for checking that the opcode came from an instruction - /// that satisfies `inst_predicates::is_pure_for_egraph()`. - pub(crate) fn of_pure_op(op: Opcode, operand_costs: impl IntoIterator) -> Self { - let c = Self::of_opcode(op) + operand_costs.into_iter().sum(); - Cost::new(c.op_cost(), c.depth().saturating_add(1)) - } - /// Compute the cost of an operation in the side-effectful skeleton. pub(crate) fn of_skeleton_op(op: Opcode, arity: usize) -> Self { - Cost::of_opcode(op) + Cost::new(u32::try_from(arity).unwrap(), (arity != 0) as _) + ScalarCost::of_opcode(op) + + ScalarCost::new(u32::try_from(arity).unwrap(), (arity != 0) as _) } } -impl std::iter::Sum for Cost { - fn sum>(iter: I) -> Self { +impl std::iter::Sum for ScalarCost { + fn sum>(iter: I) -> Self { iter.fold(Self::zero(), |a, b| a + b) } } -impl std::default::Default for Cost { - fn default() -> Cost { - Cost::zero() +impl std::default::Default for ScalarCost { + fn default() -> ScalarCost { + ScalarCost::zero() } } -impl std::ops::Add for Cost { - type Output = Cost; +impl std::ops::Add for ScalarCost { + type Output = ScalarCost; - fn add(self, other: Cost) -> Cost { + fn add(self, other: ScalarCost) -> ScalarCost { let op_cost = self.op_cost().saturating_add(other.op_cost()); let depth = std::cmp::max(self.depth(), other.depth()); - Cost::new(op_cost, depth) + ScalarCost::new(op_cost, depth) } } @@ -198,41 +281,51 @@ impl std::ops::Add for Cost { mod tests { use super::*; + impl ScalarCost { + fn of_opcode_and_operands( + op: Opcode, + operand_costs: impl IntoIterator, + ) -> Self { + let c = Self::of_opcode(op) + operand_costs.into_iter().sum(); + ScalarCost::new(c.op_cost(), c.depth().saturating_add(1)) + } + } + #[test] fn add_cost() { - let a = Cost::new(5, 2); - let b = Cost::new(37, 3); - assert_eq!(a + b, Cost::new(42, 3)); - assert_eq!(b + a, Cost::new(42, 3)); + let a = ScalarCost::new(5, 2); + let b = ScalarCost::new(37, 3); + assert_eq!(a + b, ScalarCost::new(42, 3)); + assert_eq!(b + a, ScalarCost::new(42, 3)); } #[test] fn add_infinity() { - let a = Cost::new(5, 2); - let b = Cost::infinity(); - assert_eq!(a + b, Cost::infinity()); - assert_eq!(b + a, Cost::infinity()); + let a = ScalarCost::new(5, 2); + let b = ScalarCost::infinity(); + assert_eq!(a + b, ScalarCost::infinity()); + assert_eq!(b + a, ScalarCost::infinity()); } #[test] fn op_cost_saturates_to_infinity() { - let a = Cost::new(Cost::MAX_OP_COST - 10, 2); - let b = Cost::new(11, 2); - assert_eq!(a + b, Cost::infinity()); - assert_eq!(b + a, Cost::infinity()); + let a = ScalarCost::new(ScalarCost::MAX_OP_COST - 10, 2); + let b = ScalarCost::new(11, 2); + assert_eq!(a + b, ScalarCost::infinity()); + assert_eq!(b + a, ScalarCost::infinity()); } #[test] fn depth_saturates_to_max_depth() { - let a = Cost::new(10, u8::MAX); - let b = Cost::new(10, 1); + let a = ScalarCost::new(10, u8::MAX); + let b = ScalarCost::new(10, 1); assert_eq!( - Cost::of_pure_op(Opcode::Iconst, [a, b]), - Cost::new(21, u8::MAX) + ScalarCost::of_opcode_and_operands(Opcode::Iconst, [a, b]), + ScalarCost::new(21, u8::MAX) ); assert_eq!( - Cost::of_pure_op(Opcode::Iconst, [b, a]), - Cost::new(21, u8::MAX) + ScalarCost::of_opcode_and_operands(Opcode::Iconst, [b, a]), + ScalarCost::new(21, u8::MAX) ); } } diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index d9d156eb41c8..4ff9ee997970 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -2,7 +2,7 @@ //! in CFG nodes. use super::Stats; -use super::cost::Cost; +use super::cost::ExprCost; use crate::ctxhash::NullCtx; use crate::dominator_tree::DominatorTree; use crate::hash_map::Entry as HashEntry; @@ -71,8 +71,8 @@ pub(crate) struct Elaborator<'a> { ctrl_plane: &'a mut ControlPlane, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -struct BestEntry(Cost, Value); +#[derive(Clone, Debug, PartialEq, Eq)] +struct BestEntry(ExprCost, Value); impl PartialOrd for BestEntry { fn partial_cmp(&self, other: &Self) -> Option { @@ -148,7 +148,7 @@ impl<'a> Elaborator<'a> { ) -> Self { let num_values = func.dfg.num_values(); let mut value_to_best_value = - SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value())); + SecondaryMap::with_default(BestEntry(ExprCost::zero(), Value::reserved_value())); value_to_best_value.resize(num_values); Self { func, @@ -321,9 +321,9 @@ impl<'a> Elaborator<'a> { debug_assert!(!best[x].1.is_reserved_value()); debug_assert!(!best[y].1.is_reserved_value()); best[value] = if use_worst { - std::cmp::max(best[x], best[y]) + std::cmp::max(&best[x], &best[y]).clone() } else { - std::cmp::min(best[x], best[y]) + std::cmp::min(&best[x], &best[y]).clone() }; trace!( " -> best of union({:?}, {:?}) = {:?}", @@ -332,7 +332,7 @@ impl<'a> Elaborator<'a> { } ValueDef::Param(_, _) => { - best[value] = BestEntry(Cost::zero(), value); + best[value] = BestEntry(ExprCost::zero(), value); } // If the Inst is inserted into the layout (which is, @@ -341,21 +341,20 @@ impl<'a> Elaborator<'a> { // cost. ValueDef::Result(inst, _) => { if let Some(_) = self.func.layout.inst_block(inst) { - best[value] = BestEntry(Cost::zero(), value); + best[value] = BestEntry(ExprCost::zero(), value); } else { - let inst_data = &self.func.dfg.insts[inst]; // N.B.: at this point we know that the opcode is // pure, so `pure_op_cost`'s precondition is // satisfied. - let cost = Cost::of_pure_op( - inst_data.opcode(), - self.func.dfg.inst_values(inst).map(|value| { - debug_assert!(!best[value].1.is_reserved_value()); - best[value].0 - }), - ); - best[value] = BestEntry(cost, value); + let mut cost = ExprCost::for_inst(&self.func.dfg, inst); + + for val in self.func.dfg.inst_values(inst) { + let BestEntry(val_cost, _val) = &best[val]; + cost.add(&self.func.dfg, val_cost); + } + trace!(" -> cost of value {} = {:?}", value, cost); + best[value] = BestEntry(cost, value); } } }; @@ -680,7 +679,7 @@ impl<'a> Elaborator<'a> { value: new_result, in_block: insert_block, }; - let best_result = self.value_to_best_value[result]; + let best_result = &self.value_to_best_value[result]; self.value_to_elaborated_value.insert_if_absent_with_depth( &NullCtx, best_result.1, @@ -688,7 +687,7 @@ impl<'a> Elaborator<'a> { scope_depth, ); - self.value_to_best_value[new_result] = best_result; + self.value_to_best_value[new_result] = best_result.clone(); trace!( " -> cloned inst has new result {} for orig {}", @@ -706,7 +705,7 @@ impl<'a> Elaborator<'a> { value: result, in_block: insert_block, }; - let best_result = self.value_to_best_value[result]; + let best_result = &self.value_to_best_value[result]; self.value_to_elaborated_value.insert_if_absent_with_depth( &NullCtx, best_result.1, @@ -801,7 +800,7 @@ impl<'a> Elaborator<'a> { // map now. for &result in self.func.dfg.inst_results(inst) { trace!(" -> result {}", result); - let best_result = self.value_to_best_value[result]; + let best_result = &self.value_to_best_value[result]; self.value_to_elaborated_value.insert_if_absent( &NullCtx, best_result.1, diff --git a/cranelift/entity/Cargo.toml b/cranelift/entity/Cargo.toml index 31041d4377bd..e87022a8699a 100644 --- a/cranelift/entity/Cargo.toml +++ b/cranelift/entity/Cargo.toml @@ -17,6 +17,7 @@ workspace = true [dependencies] cranelift-bitset = { workspace=true } +im-rc = "15.1.0" serde = { workspace = true, optional = true } serde_derive = { workspace = true, optional = true } diff --git a/cranelift/entity/src/imm_set.rs b/cranelift/entity/src/imm_set.rs new file mode 100644 index 000000000000..c450dcc5c086 --- /dev/null +++ b/cranelift/entity/src/imm_set.rs @@ -0,0 +1,255 @@ +//! Immutable entity sets. + +use super::EntityRef; +use core::{fmt, marker::PhantomData, mem}; +use cranelift_bitset::ScalarBitSet; + +/// An immutable, persistent version of an [`EntitySet`][crate::EntitySet]. +#[derive(Clone)] +pub struct ImmutableEntitySet { + words: im_rc::OrdMap>, + len: u32, + _phantom: PhantomData, +} + +impl Default for ImmutableEntitySet { + fn default() -> Self { + Self { + words: Default::default(), + len: 0, + _phantom: Default::default(), + } + } +} + +impl fmt::Debug for ImmutableEntitySet { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl ImmutableEntitySet +where + K: EntityRef, +{ + const BITS_PER_WORD: usize = mem::size_of::() * 8; + + #[inline] + fn word_and_bit(key: K) -> (u32, u8) { + let key_index = key.index(); + let bit = key_index % Self::BITS_PER_WORD; + let word = key_index / Self::BITS_PER_WORD; + (u32::try_from(word).unwrap(), u8::try_from(bit).unwrap()) + } + + #[inline] + fn key_from_word_and_bit(word: u32, bit: u8) -> K { + let word = usize::try_from(word).unwrap(); + let bit = usize::from(bit); + K::new(word * Self::BITS_PER_WORD + bit) + } + + /// Create a set containing just the given key. + #[inline] + pub fn unit(key: K) -> Self { + let (word, bit) = Self::word_and_bit(key); + let mut bitset = ScalarBitSet::new(); + bitset.insert(bit); + ImmutableEntitySet { + words: im_rc::OrdMap::unit(word, bitset), + len: 1, + _phantom: PhantomData, + } + } + + /// Insert a new key into this set. + /// + /// Returns `true` if the set did not previously contain the key, `false` + /// otherwise. + #[inline] + pub fn insert(&mut self, key: K) -> bool { + let (word, bit) = Self::word_and_bit(key); + let bitset = self.words.entry(word).or_default(); + let is_new = bitset.insert(bit); + self.len += u32::from(is_new); + is_new + } + + /// Does this set contain the given key? + #[inline] + pub fn contains(&self, key: K) -> bool { + let (word, bit) = Self::word_and_bit(key); + self.words.get(&word).is_some_and(|bits| bits.contains(bit)) + } + + /// Get the number of elements in this set. + #[inline] + pub fn len(&self) -> usize { + usize::try_from(self.len).unwrap() + } + + /// Iterate over the keys in this set, in order. + #[inline] + pub fn iter(&self) -> ImmutableEntitySetIter<'_, K> { + ImmutableEntitySetIter { + words: self.words.iter(), + word_and_bits: None, + _phantom: PhantomData, + } + } +} + +/// An iterator over the entries in an [`ImmutableEntitySet`]. +pub struct ImmutableEntitySetIter<'a, K> { + words: im_rc::ordmap::Iter<'a, u32, ScalarBitSet>, + word_and_bits: Option<(u32, cranelift_bitset::scalar::Iter)>, + _phantom: PhantomData, +} + +impl Iterator for ImmutableEntitySetIter<'_, K> +where + K: EntityRef, +{ + type Item = K; + + #[inline] + fn next(&mut self) -> Option { + loop { + let (word, bits) = { + if self.word_and_bits.is_none() { + let (&word, bits) = self.words.next()?; + self.word_and_bits = Some((word, bits.iter())); + } + // Safety: we replaced `None` with `Some` just above. + unsafe { self.word_and_bits.as_mut().unwrap_unchecked() } + }; + + let Some(bit) = bits.next() else { + self.word_and_bits = None; + continue; + }; + + return Some(ImmutableEntitySet::key_from_word_and_bit(*word, bit)); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::vec::Vec; + + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] + struct Key(u32); + crate::entity_impl!(Key); + + #[test] + fn smoke_test() { + let mut set = ImmutableEntitySet::default(); + + for i in 0..100 { + let is_new = set.insert(Key::new(i)); + assert!(is_new); + } + for i in 0..100 { + let is_new = set.insert(Key::new(i)); + assert!(!is_new); + } + + for i in 900..1000 { + let is_new = set.insert(Key::new(i)); + assert!(is_new); + } + for i in 900..1000 { + let is_new = set.insert(Key::new(i)); + assert!(!is_new); + } + + for i in u32::MAX - 100..u32::MAX { + let i = usize::try_from(i).unwrap(); + let is_new = set.insert(Key::new(i)); + assert!(is_new); + } + for i in u32::MAX - 100..u32::MAX { + let i = usize::try_from(i).unwrap(); + let is_new = set.insert(Key::new(i)); + assert!(!is_new); + } + + for i in 0..100 { + assert!(set.contains(Key::new(i))); + } + for i in 100..200 { + assert!(!set.contains(Key::new(i))); + } + + for i in 800..900 { + assert!(!set.contains(Key::new(i))); + } + for i in 900..1000 { + assert!(set.contains(Key::new(i))); + } + for i in 1000..1100 { + assert!(!set.contains(Key::new(i))); + } + + for i in u32::MAX - 200..u32::MAX - 100 { + let i = usize::try_from(i).unwrap(); + assert!(!set.contains(Key::new(i))); + } + for i in u32::MAX - 100..u32::MAX { + let i = usize::try_from(i).unwrap(); + assert!(set.contains(Key::new(i))); + } + + assert_eq!(set.len(), 300); + assert_eq!(set.iter().count(), 300); + for k in set.iter() { + assert!(set.contains(k)); + } + } + + #[test] + fn unit() { + let set = ImmutableEntitySet::unit(Key::new(42)); + + assert!(set.contains(Key::new(42))); + + assert!(!set.contains(Key::new(0))); + assert!(!set.contains(Key::new(41))); + assert!(!set.contains(Key::new(43))); + + assert_eq!(set.iter().collect::>(), [Key::new(42)]); + } + + #[test] + fn iter() { + let mut set = ImmutableEntitySet::default(); + set.insert(Key::new(0)); + set.insert(Key::new(1)); + set.insert(Key::new(2)); + set.insert(Key::new(31)); + set.insert(Key::new(32)); + set.insert(Key::new(33)); + set.insert(Key::new(63)); + set.insert(Key::new(64)); + set.insert(Key::new(65)); + set.insert(Key::new(usize::try_from(u32::MAX - 1).unwrap())); + + assert_eq!( + set.iter().collect::>(), + [ + Key::new(0), + Key::new(1), + Key::new(2), + Key::new(31), + Key::new(32), + Key::new(33), + Key::new(63), + Key::new(64), + Key::new(65), + Key::new(usize::try_from(u32::MAX - 1).unwrap()), + ] + ); + } +} diff --git a/cranelift/entity/src/lib.rs b/cranelift/entity/src/lib.rs index f412b277bea1..8ebce4a049d9 100644 --- a/cranelift/entity/src/lib.rs +++ b/cranelift/entity/src/lib.rs @@ -271,6 +271,7 @@ macro_rules! entity_impl { pub mod packed_option; mod boxed_slice; +mod imm_set; mod iter; mod keys; mod list; @@ -280,6 +281,7 @@ mod set; mod sparse; pub use self::boxed_slice::BoxedSlice; +pub use self::imm_set::{ImmutableEntitySet, ImmutableEntitySetIter}; pub use self::iter::{Iter, IterMut}; pub use self::keys::Keys; pub use self::list::{EntityList, ListPool}; diff --git a/cranelift/filetests/filetests/egraph/cost-function.clif b/cranelift/filetests/filetests/egraph/cost-function.clif new file mode 100644 index 000000000000..a8fc15bbc15f --- /dev/null +++ b/cranelift/filetests/filetests/egraph/cost-function.clif @@ -0,0 +1,104 @@ +;; Egraph extraction and cost function edge cases. + +test optimize precise-output +set opt_level=speed_and_size +target x86_64 + +function %f(i64) -> i64 { + block0(v0: i64): + v1 = iadd v0, v0 + v2 = iadd v1, v1 + v3 = iadd v2, v2 + v4 = iadd v3, v3 + v5 = iadd v4, v4 + v6 = iadd v5, v5 + v7 = iadd v6, v6 + v8 = iadd v7, v7 + v9 = iadd v8, v8 + v10 = iadd v9, v9 + v11 = iadd v10, v10 + v12 = iadd v11, v11 + v13 = iadd v12, v12 + v14 = iadd v13, v13 + v15 = iadd v14, v14 + v16 = iadd v15, v15 + v17 = iadd v16, v16 + v18 = iadd v17, v17 + v19 = iadd v18, v18 + v20 = iadd v19, v19 + v21 = iadd v20, v20 + v22 = iadd v21, v21 + v23 = iadd v22, v22 + v24 = iadd v23, v23 + v25 = iadd v24, v24 + v26 = iadd v25, v25 + v27 = iadd v26, v26 + v28 = iadd v27, v27 + v29 = iadd v28, v28 + v30 = iadd v29, v29 + v31 = iadd v30, v30 + v32 = iadd v31, v31 + v33 = iadd v32, v32 + + ;; When a cost function that doesn't "understand" shared structure and + ;; that expressions are DAGs, not trees -- for example, it just does + ;; something like + ;; + ;; cost(v) = opcode_cost(v) + sum(cost(u) for u in operands(inst(v))) + ;; + ;; like our old cost function did -- then `v33` should now have infinite + ;; cost at this point. + ;; + ;; Now we append a little identity function to the end, which the + ;; optimizer should see through via the rules `x * 2 => x + x` followed + ;; by `(x + y) - y => x` and ultimately recognize that `v36` is the same + ;; as `v33`. However, if `cost(x)` saturated to infinity, because of + ;; poor cost functions that don't account for shared structure, then it + ;; is not actually true that `cost(x) < cost(identity(x))`, and we can + ;; fail to boil away the identity function. This test checks that we + ;; don't do that. + + v34 = iconst.i64 2 + v35 = imul v33, v34 + v36 = isub v35, v33 + return v36 +} + +; function %f(i64) -> i64 fast { +; block0(v0: i64): +; v1 = iadd v0, v0 +; v2 = iadd v1, v1 +; v3 = iadd v2, v2 +; v4 = iadd v3, v3 +; v5 = iadd v4, v4 +; v6 = iadd v5, v5 +; v7 = iadd v6, v6 +; v8 = iadd v7, v7 +; v9 = iadd v8, v8 +; v10 = iadd v9, v9 +; v11 = iadd v10, v10 +; v12 = iadd v11, v11 +; v13 = iadd v12, v12 +; v14 = iadd v13, v13 +; v15 = iadd v14, v14 +; v16 = iadd v15, v15 +; v17 = iadd v16, v16 +; v18 = iadd v17, v17 +; v19 = iadd v18, v18 +; v20 = iadd v19, v19 +; v21 = iadd v20, v20 +; v22 = iadd v21, v21 +; v23 = iadd v22, v22 +; v24 = iadd v23, v23 +; v25 = iadd v24, v24 +; v26 = iadd v25, v25 +; v27 = iadd v26, v26 +; v28 = iadd v27, v27 +; v29 = iadd v28, v28 +; v30 = iadd v29, v29 +; v31 = iadd v30, v30 +; v32 = iadd v31, v31 +; v33 = iadd v32, v32 +; return v33 +; } +