diff --git a/Cargo.lock b/Cargo.lock index 191e5a8..0221b1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -85,6 +85,31 @@ dependencies = [ "winapi", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "dot-writer" version = "0.1.3" @@ -169,7 +194,9 @@ version = "0.3.6" dependencies = [ "annotate-snippets", "clap", + "itertools", "metamath-rs", + "rayon", "simple_logger", ] @@ -199,6 +226,26 @@ dependencies = [ "libc", ] +[[package]] +name = "rayon" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.2.16" diff --git a/metamath-knife/Cargo.toml b/metamath-knife/Cargo.toml index ae205ba..ad39022 100644 --- a/metamath-knife/Cargo.toml +++ b/metamath-knife/Cargo.toml @@ -12,6 +12,8 @@ edition = "2021" [dependencies] clap = "2.33" +itertools = "0.12.0" +rayon = "1.8.1" simple_logger = "1.13" annotate-snippets = "0.9" metamath-rs = { path = "../metamath-rs" } diff --git a/metamath-knife/src/main.rs b/metamath-knife/src/main.rs index 920aba1..c8fe8c6 100644 --- a/metamath-knife/src/main.rs +++ b/metamath-knife/src/main.rs @@ -17,6 +17,9 @@ use std::mem; use std::str::FromStr; use std::sync::Arc; +mod minimizer; +use minimizer::minimize; + fn positive_integer(val: String) -> Result<(), String> { u32::from_str(&val).map(|_| ()).map_err(|e| format!("{e}")) } @@ -53,6 +56,7 @@ fn main() { (@arg repeat: --repeat "Demonstrates incremental verifier") (@arg jobs: -j --jobs +takes_value validator(positive_integer) "Number of threads to use for verification") + (@arg minimize: -M --minimize [LABEL] ... "Attempts to minimize the given theorem") (@arg export: -e --export [LABEL] ... "Outputs a proof file") (@arg biblio: --biblio [FILE] ... "Supplies a bibliography file for verify-markup\n\ Can be used one or two times; the second is for exthtml processing") @@ -82,7 +86,8 @@ fn main() { || matches.is_present("verify_parse_stmt") || matches.is_present("export_grammar_dot") || matches.is_present("dump_grammar") - || matches.is_present("dump_formula"), + || matches.is_present("dump_formula") + || matches.is_present("minimize"), jobs: usize::from_str(matches.value_of("jobs").unwrap_or("1")) .expect("validator should check this"), }; @@ -245,6 +250,14 @@ fn main() { db.print_typesetting(); } + if let Some(exps) = matches.values_of_lossy("minimize") { + db.stmt_parse_pass(); + db.verify_usage_pass(); + for label in exps { + minimize(&db, &label); + } + } + if let Some(exps) = matches.values_of_lossy("export") { db.scope_pass(); for file in exps { diff --git a/metamath-knife/src/minimizer.rs b/metamath-knife/src/minimizer.rs new file mode 100644 index 0000000..b53af6c --- /dev/null +++ b/metamath-knife/src/minimizer.rs @@ -0,0 +1,367 @@ +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, + sync::Arc, + time::Instant, +}; + +use itertools::Itertools; +use metamath_rs::{ + as_str, axiom_use::AxiomUse, formula::Substitutions, nameck::Nameset, proof::ProofTreeArray, + scopeck::Frame, Database, Formula, Label, StatementRef, +}; +use rayon::iter::{ParallelBridge, ParallelIterator}; + +/// One step of a minimized proof +struct MinimizedStep { + apply: Label, + hyps: Box<[usize]>, + result: Formula, + substitutions: Box, +} + +impl MinimizedStep { + fn add_to_proof_tree_array( + &self, + stack_buffer: &mut Vec, + arr: &mut ProofTreeArray, + logical_steps_idx: &[Option], + db: &Database, + ) -> Option { + let mand_hyps: Vec<_> = self + .hyps + .iter() + .map(|step_idx| logical_steps_idx[*step_idx].unwrap()) + .collect(); + db.build_proof_step( + self.apply, + &self.result, + &mand_hyps.into_boxed_slice(), + &self.substitutions, + stack_buffer, + arr, + ) + } + + /// Attempt to identify best candidate for minimization among two minimized steps. + /// Currently returns the step with + #[inline] + fn _heuristic(&self, _step_usage: &[usize]) -> usize { + let mut l = 0; + for h in self.hyps.iter() { + l += if *h < 20 { + 1 + } else if *h < 120 { + 2 + } else if *h < 620 { + 3 + } else { + 4 + }; + } + l + } + + fn compare_with(&self, other: &MinimizedStep) -> Ordering { + // match self.heuristic(step_usage).cmp(&other.heuristic(step_usage)) { + match self.hyps.cmp(&other.hyps) { + Ordering::Equal => {} + ord => return ord, + } + self.apply.cmp(&other.apply) + } +} + +/// Attempt to minimize a given statement. +/// Requires: [`Database::stmt_parse_pass`] and [`Database::verify_usage_pass`] +pub fn minimize(db: &Database, label_str: &str) { + let now = Instant::now(); + let mut out = std::io::stdout(); + let names = db.name_result(); + let sref: metamath_rs::StatementRef<'_> = + db.statement(label_str.as_bytes()).unwrap_or_else(|| { + panic!("Label {label_str} does not correspond to an existing statement"); + }); + let label = names.get_atom(sref.label()); + let original_proof_tree = db.get_proof_tree(sref).unwrap_or_else(|| { + panic!("Could not get original proof tree for label {label_str}"); + }); + + // We build a hashtable with all theorems of the database, indexed by their top node + // Some theorems, like `mp4an`, have a variable as their top node. + // Those are stored in `theorem_rest` and will be checked every time. + let mut theorem_hash = HashMap::new(); + let mut theorem_rest = vec![]; + let axiom_usage = db.try_usage_result().unwrap(); + let current_axiom_usage = axiom_usage.get_axiom_use(&sref).unwrap_or_else(|| { + panic!("Theorem {label_str} does not have an axiom usage"); + }); + let provable_typecode = db.grammar_result().provable_typecode(); + let provable_typecode_token = names.atom_name(provable_typecode); + for theorem in db.statements_range(..label).filter(|s| { + s.is_assertion() + && s.math_at(0).slice == provable_typecode_token + && !s.discouragements().usage_discouraged + && axiom_usage + .get_axiom_use(s) + .unwrap_or(&AxiomUse::default()) + .compatible_with(current_axiom_usage) + }) { + let formula = db.stmt_parse_result().get_formula(&theorem).unwrap(); + if formula.get_typecode() == provable_typecode { + if formula.is_variable_by_path(&[]).unwrap() { + theorem_rest.push(theorem); + } else { + theorem_hash + .entry(formula.get_by_path(&[]).unwrap()) + .or_insert(vec![]) + .push(theorem); + } + } + } + + // Prepare the list of formulas for each logical step. + let formulas = original_proof_tree.with_logical_steps(db, |cur, _ix, _stmt, _hyps| { + proof_step_formula(db, &original_proof_tree, cur, true) + }); + + // Prepare the proof array with the essential hypotheses + let mut proof_tree = ProofTreeArray::default(); + let mut stack_buffer = vec![]; + let mut logical_steps_idx = vec![]; // For each logical step, stores the corresponding index in the full proof tree array + let frame = db.get_frame(label).unwrap(); + for (label, formula) in frame.essentials() { + logical_steps_idx.push(db.build_proof_hyp( + label, + formula, + &mut stack_buffer, + &mut proof_tree, + )); + } + let essentials_count = logical_steps_idx.len(); + + let dv = labelled_dv(&frame, names); + let mand_vars = frame + .mandatory_vars() + .iter() + .map(|symbol_atom| names.lookup_float_by_atom(*symbol_atom).statement_atom) + .collect::>(); + + // Iterate through each logical step, and attempt to minimize. + let mut minimized_steps = vec![]; + for step_formulas in prefixes_asc(formulas.as_slice()).skip(essentials_count + 1) { + let (target_formula, previous_formulas) = step_formulas.split_last().unwrap(); + println!( + "Working on step {}: {}...", + step_formulas.len(), + target_formula.as_ref(db) + ); + let step = theorem_hash + .get(&target_formula.get_by_path(&[]).unwrap()) + .unwrap() + .iter() + .chain(theorem_rest.iter()) + .par_bridge() + .filter_map(|theorem| { + match_theorem( + db, + theorem, + target_formula, + previous_formulas, + &dv, + &mand_vars, + ) + }) + .min_by(|a, b| a.compare_with(b)) + .unwrap_or_else(|| { + panic!( + "No minimization found for formula {}", + target_formula.as_ref(db) + ) + }); + minimized_steps.push(step); + } + + // Identify which steps have actually been used: start with the qed step and accumulate all dependents + let mut used_steps = vec![formulas.len() - 1]; + let mut i = 0; + while i < used_steps.len() { + if used_steps[i] > essentials_count { + used_steps.extend_from_slice(&minimized_steps[used_steps[i] - essentials_count].hyps); + } + i += 1; + } + let used_steps: HashSet = HashSet::from_iter(used_steps); + + // Finally build the rest of the proof with the minimized steps + for (idx, step) in minimized_steps.iter().enumerate() { + if idx < essentials_count || used_steps.contains(&(essentials_count + idx)) { + logical_steps_idx.push(step.add_to_proof_tree_array( + &mut stack_buffer, + &mut proof_tree, + &logical_steps_idx, + db, + )); + } else { + logical_steps_idx.push(None); + } + } + proof_tree.qed = minimized_steps + .last() + .unwrap() + .add_to_proof_tree_array(&mut stack_buffer, &mut proof_tree, &logical_steps_idx, db) + .unwrap(); + + // Display the generated proof tree + proof_tree.calc_indent(); + let _ = db.export_mmp_proof_tree(sref.label(), &proof_tree, &mut out); + println!( + "minimize {} {}ms", + label_str, + (now.elapsed() * 1000).as_secs() + ); +} + +/// Attempt to match the candidate theorem, provided the known step formulas. +fn match_theorem( + db: &Database, + candidate_theorem: &StatementRef<'_>, + target_formula: &Formula, + step_formulas: &[Formula], + dv: &[(Label, Label)], + mand_vars: &[Label], +) -> Option { + let names = db.name_result(); + let mut substitutions = Substitutions::default(); + let theorem_formula = db.stmt_parse_result().get_formula(candidate_theorem)?; + target_formula + .unify(theorem_formula, &mut substitutions) + .ok()?; + + // Found a theorem which *might* be applied, now we check the hypotheses + let theorem_label = names.lookup_label(candidate_theorem.label())?.atom; + let frame = db.get_frame(theorem_label)?; + let essentials: Vec<_> = frame.essentials().collect(); + if essentials.is_empty() { + let substitutions = Box::new(substitutions); + let step_dv = labelled_dv(&frame, names); + check_dv_constraints(dv, mand_vars, &step_dv, &substitutions)?; // This can be tested with ~hbalw/ax-5 + + // No hypoteses, we're done! + Some(MinimizedStep { + apply: theorem_label, + hyps: Box::new([]), + result: target_formula.clone(), + substitutions, + }) + } else { + // Iterate over all possible combination of steps, for each formula. + essentials + .iter() + .map(|(_, hyp_fmla)| { + step_formulas + .iter() + .enumerate() + .filter_map(|(step_idx, step_fmla)| { + let mut subst = substitutions.clone(); + step_fmla + .unify(hyp_fmla, &mut subst) + .map(|()| (step_idx, subst)) + .ok() + }) + }) + .multi_cartesian_product() + .find_map(|hyp_match| { + let mut hyps = vec![]; + let mut subst = substitutions.clone(); + for (step_idx, step_subst) in hyp_match { + subst.extend(&step_subst).ok()?; + hyps.push(step_idx); + } + + let step_dv = labelled_dv(&frame, names); + check_dv_constraints(dv, mand_vars, &step_dv, &subst)?; // This can be tested with ~hbalw/ax-5 + Some(MinimizedStep { + apply: theorem_label, + hyps: hyps.into_boxed_slice(), + result: target_formula.clone(), + substitutions: Box::new(subst), + }) + }) + } +} + +/// Verifies that the dv constraints are not violated. +fn check_dv_constraints( + main_dv: &[(Label, Label)], + mand_vars: &[Label], + step_dv: &[(Label, Label)], + subst: &Substitutions, +) -> Option<()> { + for (l1, l2) in step_dv { + if let Some(fmla1) = subst.get(*l1) { + if let Some(fmla2) = subst.get(*l2) { + fmla1 + .variable_iter() + .cartesian_product(fmla2.variable_iter()) + .all(|(a1, a2)| { + !mand_vars.contains(&a1) + || !mand_vars.contains(&a2) + || main_dv.contains(&(a1, a2)) + || main_dv.contains(&(a2, a1)) + }) + .then_some(())?; + } + } + } + Some(()) +} + +/// Convert a proof step in a [ProofTreeArray] to the corresponding [Formula]. +pub fn proof_step_formula( + database: &Database, + proof_tree: &ProofTreeArray, + tree_index: usize, + use_provables: bool, +) -> Formula { + let formula_string = String::from_utf8_lossy(&proof_tree.exprs().unwrap()[tree_index]); + let nset = database.name_result(); + let grammar = database.grammar_result(); + let typecodes = if use_provables { + Box::new([grammar.provable_typecode()]) + } else { + grammar.typecodes() + }; + typecodes + .iter() + .find_map(|tc| { + grammar + .parse_string( + format!("{} {}", as_str(nset.atom_name(*tc)), formula_string.trim()).as_str(), + nset, + ) + .ok() + }) + .unwrap_or_else(|| panic!("Could not parse formula: {}", formula_string)) +} + +/// Extract the frame'd disjoint variable conditions in terms of labels. +fn labelled_dv(frame: &Frame, names: &Arc) -> Vec<(Label, Label)> { + let mand_vars = frame.mandatory_vars(); + frame + .mandatory_dv + .iter() + .map(|(i, j)| { + ( + names.lookup_float_by_atom(mand_vars[*i]).statement_atom, + names.lookup_float_by_atom(mand_vars[*j]).statement_atom, + ) + }) + .collect() +} + +/// Utility function to iterate prefixes of slices +/// See [https://stackoverflow.com/questions/68837763/how-to-iterate-prefixes-or-suffixes-of-vec-or-slice-in-rust] +pub fn prefixes_asc(slice: &[T]) -> impl Iterator + DoubleEndedIterator { + (0..=slice.len()).map(move |len| &slice[..len]) +} diff --git a/metamath-rs/src/axiom_use.rs b/metamath-rs/src/axiom_use.rs index e1a76a9..a50d79b 100644 --- a/metamath-rs/src/axiom_use.rs +++ b/metamath-rs/src/axiom_use.rs @@ -5,30 +5,51 @@ use itertools::PeekingNext; use crate::bit_set::Bitset; use crate::diag::Diagnostic; +use crate::nameck::Nameset; use crate::segment::SegmentRef; use crate::segment_set::SegmentSet; use crate::statement::{CommandToken, StatementAddress, TokenPtr}; use crate::util::HashMap; -use crate::StatementType; use crate::{as_str, database::time, Database}; +use crate::{StatementRef, StatementType}; -/// Diagnostics issued when checking axiom usage in the Database. -/// +/// Describes the axioms used by a given theorem. +pub type AxiomUse = Bitset; + +impl AxiomUse { + /// Returns whether this axiom use is compatible with the provided one, + /// i.e. whether is uses the same axioms or fewer. + #[inline] + pub fn compatible_with(&self, other: &AxiomUse) -> bool { + self.is_subset(other) + } +} + +/// Axiom dependencies +/// Including diagnostics issued when checking axiom usage in the Database. #[derive(Debug, Default)] -pub struct UsageResult(Vec<(StatementAddress, Diagnostic)>); +pub struct UsageResult { + axiom_use_map: HashMap, + diagnostics: Vec<(StatementAddress, Diagnostic)>, +} impl UsageResult { /// Returns the list of errors that were generated during the usage check pass. #[must_use] pub fn diagnostics(&self) -> Vec<(StatementAddress, Diagnostic)> { - self.0.clone() + self.diagnostics.clone() + } + + /// Returns the axioms used by a given statement + #[must_use] + pub fn get_axiom_use(&self, stmt: &StatementRef<'_>) -> Option<&'_ AxiomUse> { + self.axiom_use_map.get(&stmt.address()) } } struct UsagePass<'a> { axiom_use_map: HashMap, Bitset>, axioms: Vec>, - result: &'a mut UsageResult, } impl<'a> UsagePass<'a> { @@ -70,7 +91,13 @@ impl<'a> UsagePass<'a> { } /// Verify that all usage declarations are correct. - fn verify_usage(&'a mut self, sset: &'a SegmentSet) { + fn verify_usage<'b>( + &mut self, + sset: &'b SegmentSet, + diagnostics: &mut Vec<(StatementAddress, Diagnostic)>, + ) where + 'b: 'a, + { for sref in sset.segments(..) { let mut j_commands: std::slice::Iter<'_, (i32, (crate::Span, Vec))> = sref.j_commands.iter(); @@ -82,7 +109,7 @@ impl<'a> UsagePass<'a> { { if let Err(diags) = self.parse_command(sref, args) { for diag in diags { - self.result.0.push((stmt.address(), diag)); + diagnostics.push((stmt.address(), diag)); } } } @@ -123,13 +150,18 @@ impl<'a> UsagePass<'a> { } /// Verify the axiom usage -pub(crate) fn verify_usage(sset: &Arc, usage: &mut UsageResult) { - UsagePass { +pub(crate) fn verify_usage(sset: &Arc, names: &Arc, usage: &mut UsageResult) { + let mut pass = UsagePass { axiom_use_map: HashMap::default(), axioms: vec![], - result: usage, + }; + + pass.verify_usage(sset, &mut usage.diagnostics); + + for (label, u) in pass.axiom_use_map.drain() { + let address = names.lookup_label(label).unwrap().address; + usage.axiom_use_map.insert(address, u); } - .verify_usage(sset); } impl Database { diff --git a/metamath-rs/src/bit_set.rs b/metamath-rs/src/bit_set.rs index d2abd4a..df1ea45 100644 --- a/metamath-rs/src/bit_set.rs +++ b/metamath-rs/src/bit_set.rs @@ -129,6 +129,22 @@ impl Bitset { } } + /// Returns true if this bitset is a subset of the other bitset. + pub fn is_subset(&self, other: &Bitset) -> bool { + (!other.head & self.head == 0) + && match (&self.tail, &other.tail) { + (Some(my_tail), Some(other_tail)) => { + my_tail + .iter() + .zip(other_tail.iter()) + .all(|(&lword, &rword)| !rword & lword == 0) + && other_tail.iter().skip(my_tail.len()).all(|&word| word == 0) + } + (None, Some(rtail)) => rtail.iter().all(|&word| word == 0), + _ => true, + } + } + /// Returns an iterator over the indices of set bits in the bitset. pub fn iter(&self) -> BitsetIter<'_> { self.into_iter() @@ -164,7 +180,7 @@ impl<'a> IntoIterator for &'a Bitset { } /// Iterator for set bits in a bitset. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct BitsetIter<'a> { bits: usize, offset: usize, diff --git a/metamath-rs/src/database.rs b/metamath-rs/src/database.rs index a80a998..77fd731 100644 --- a/metamath-rs/src/database.rs +++ b/metamath-rs/src/database.rs @@ -631,10 +631,12 @@ impl Database { /// than error diagnostics. It does not save any parsed proof data. pub fn verify_usage_pass(&mut self) -> &Arc { if self.usage.is_none() { + self.name_pass(); time(&self.options.clone(), "usage", || { let parse = self.parse_result(); + let names = self.name_result(); let mut usage = UsageResult::default(); - crate::axiom_use::verify_usage(parse, &mut usage); + crate::axiom_use::verify_usage(parse, names, &mut usage); self.usage = Some(Arc::new(usage)); }); } diff --git a/metamath-rs/src/export.rs b/metamath-rs/src/export.rs index 53cbb34..994856e 100644 --- a/metamath-rs/src/export.rs +++ b/metamath-rs/src/export.rs @@ -221,8 +221,10 @@ impl Database { .exprs() .expect("exporting MMP requires expressions enabled in the ProofTreeArray"); for &mut (cur, ref mut line) in &mut lines { - for _ in 0..(spaces + indent[cur] - line.len() as u16) { - line.push(' ') + if indent[cur] < u16::MAX { + for _ in 0..(spaces + indent[cur] - line.len() as u16) { + line.push(' ') + } } line.push_str(str::from_utf8(tc).unwrap()); line.push_str(&String::from_utf8_lossy(&exprs[cur])); diff --git a/metamath-rs/src/formula.rs b/metamath-rs/src/formula.rs index 4801c44..e882a98 100644 --- a/metamath-rs/src/formula.rs +++ b/metamath-rs/src/formula.rs @@ -37,6 +37,7 @@ use std::collections::hash_map::Iter; use std::fmt::Debug; use std::fmt::Display; use std::iter::FromIterator; +use std::mem::replace; use std::ops::Range; use std::sync::Arc; @@ -93,8 +94,17 @@ impl Substitutions { } /// Add all the provided substitutions to this one - pub fn extend(&mut self, substitutions: &Substitutions) { + /// Fails in case + pub fn extend(&mut self, substitutions: &Substitutions) -> Result<(), UnificationError> { + for (label, fmla) in &substitutions.0 { + if let Some(my_fmla) = self.0.get(label) { + if !my_fmla.eq(fmla) { + Err(UnificationError::UnificationFailed)?; + } + } + } self.0.extend(substitutions.0.clone()); + Ok(()) } /// Gets the formula the given label is to be substituted with. @@ -159,6 +169,19 @@ impl<'a> Debug for SubstitutionsRef<'a> { } } +impl<'a> Display for SubstitutionsRef<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut dm = f.debug_map(); + for (label, formula) in &self.substitutions.0 { + dm.entry( + &as_str(self.db.name_result().atom_name(*label)), + &format!("{}", &formula.as_ref(self.db)), + ); + } + dm.finish() + } +} + /// A parsed formula, in a tree format which is convenient to perform unifications #[derive(Clone, Default, Debug)] pub struct Formula { @@ -221,24 +244,52 @@ impl Formula { !self.tree.has_children(self.root) } + /// Returns the node obtained when following the given path. + #[inline] + fn node_by_path(&self, path: &[usize]) -> Option { + let mut node_id = self.root; + for index in path { + node_id = self.tree.nth_child(node_id, *index)?; + } + Some(node_id) + } + /// Returns the label obtained when following the given path. /// Each element of the path gives the index of the child to retrieve. /// For example, the empty #[must_use] pub fn get_by_path(&self, path: &[usize]) -> Option