O7HVFFGPDNLLC3NM7LRNJQTKEMZNACPJARIP35NGBKMGRW5CLEIAC
///
/// The corresponding term number, its ident (name), whether it's right-associative, and any
/// literal items/symbols used in the declaration. For general notation declarations that
/// start with a non-variable, that gets left out of `lits`, and instead becomes
/// one of the keys in `prefixes`. When the math parser finds the initial token, it
/// looksup the term using the `prefixes` mapping, and then uses the remaining consts
/// as checks to make sure the item is syntactically correct, but it will have already
/// parsed and gone past the initial const token.
///
/// Examples of lits:
///```text
/// NotationInfo {
/// term_num: 2,
/// term_ident: an,
/// rassoc: false,
/// lits: [Var { pos: 0, prec: Num(34) }, Const(/\), Var { pos: 1, prec: Num(35) }]
/// }
///
/// NotationInfo {
/// term_num: 1,
/// term_ident: not,
/// rassoc: true,
/// lits: [Var { pos: 0, prec: Num(41) }]
///}
///
/// // notice the missing `Const([)` at the front; it's a key in the `prefixes` map.
/// NotationInfo {
/// term_num: 14,
/// term_ident: sb,
/// rassoc: true,
/// lits: [Var { pos: 0, prec: Num(1) }, Const(/), Var { pos: 1, prec: Num(1) }, Const(]), Var { pos: 2, prec: Num(42) }]
/// }
///```
/// The persistent (remains between the checking of each mm0 declaration) mm0 state.
///
/// The most complex part of this is the notation system. Users can declare 4 kinds of notation,
/// prefix, infixl, infixr, and general. The verifier tracks these in only two maps, putting
/// prefix and general notations in the `prefixes` map, and infixl/infixr declarations in the `infixes` map.
/// The reason for this grouping is because the math parser looks for these two things separately, and the places
/// where a prefix item might be used are also the places where a general notation can be, and the same is true
/// for both flavors of infix.
///
/// There are four maps holding notation information.
/// 1. `consts`: map of the item's name to its associated precedence.
/// 2. `occupied_precs`: Prevents parser ambiguities; see the doc comment above the field for more info about this.
/// 3. `prefixes`: Associates the items' name to its `NotationInfo { term_num, term_ident, rassoc, lits }`
/// 3. `infixes`: Associates the items' name to its `NotationInfo { term_num, term_ident, rassoc, lits }`
/// Maps the string representing a user-declared notation to its associated precedence value.
/// `/\` |-> `Prec::Num(34)`
/// Maps the string representing a user-declared notation to its associated precedence value. For example:
///```text
/// peano.mm0/and
///
/// /\ |-> `Prec::Num(34)`
///```
/// A mapping of precedence levels to (prefix \/ infixl \/ infixr).
/// This is necessary to prevent ambiguities in the math parser; while the math parser
/// won't completely break without this, "You will get _a_ parse, but part of the idea here
/// is that you will also get _the_ parse", where _the_ parse is the one that conforms to
/// the context-free grammar laid out [here](https://github.com/digama0/mm0/blob/master/mm0.md#secondary-parsing).
///
/// If the environment permits both an infixl and infixr at the same precednce level, for example:
///
///```text
/// infixr im: $->$ prec 25;
/// infixl an: $/\$ prec 25;
///```
///
/// Then beginning from state `Start`, and using rules 0, 1, and 2 taken from the mm0 expression grammar, we can derive
/// both `(a -> (b /\ c))` and `((a -> b) /\ c)` as parses of `$ a -> b /\ c $`:
///```text
/// Start := $ expr(0) $;
/// rule0 := expression(p1) -> expression(p2) (if p1 < p2)
/// rule1 := expression(p) -> expression(p) OP expression(p+1) (if OP is infixl prec p)
/// rule2 := expression(p) -> expression(p+1) OP expression(p) (if OP is infixr prec p)
///
/// with both:
/// infixr im: $->$ prec 25;
/// infixl an: $/\$ prec 25;
///
/// To get ((a -> b) /\ c):
/// ($ a -> b /\ c $, 0) ~rule0~> ($ a -> b /\ c $, 25)
/// ($ a -> b /\ c $, 25) ~rule1~> ($ a -> b $, 25) /\ ($ c $, 26)
/// ($ a -> b $, 25) ~rule2~> ($ a $, 26) -> ($ b $, 25)
///
/// To get (a -> (b /\ c)):
/// ($ a -> b /\ c $, 0) ~rule0~> ($ a -> b /\ c $, 25) by rule0
/// ($ a -> b /\ c $, 25) ~rule2~> ($ a $, 26) -> ($ b /\ c $, 25) by rule2
/// ($ b /\ c $, 25) ~rule1~> ($ b $, 25) /\ ($ c $, 26) by rule1
///```
///
/// By disallowing both `-> infixr` and `/\ infixl` to inhabit the same precedence, this ambiguity
/// can be avoided.
///
/// `peano.mm1` and the mm1 files based on it adopt a convention of using even precedence numbers for
/// left-associative infix operators, and odd precedence numbers for right-associative infix operators
/// as a simple way to avoid this kind of collision.
/// infixl is L
/// infixr is R
/// (prefix w/ arity > 0) /\ general ending with a variable are Rassoc
/// 0ary prefix /\ general ending with a const don't affect the map.
pub occupied_precs: HashMap<Prec, Assoc>,
pub fn occupy_prec(&mut self, term_ident: Str<'a>, prec: Prec, assoc: Assoc) -> Res<()> {
if let Some(already) = self.occupied_precs.insert(prec, assoc) {
if already != assoc {
return Err(VerifErr::Msg(format!("{:?}; Cannot re-occupy prec {:?} with different associativity", term_ident, prec)))
}
}
Ok(())
}
/// For initial calls to `expr`; expects the parser to have something
/// surrounded by `$` delimiters.
/// The main entry point to the math parser. When the mmz parser wants
/// to parse an expression from a new math string (`$ .. $`), it calls this.
/// For recursive calls to `expr`; doesn't demand a leading `$` token
fn expr(&mut self, p: Prec) -> Res<MmzExpr<'b>> {
let lhs_e = self.prefix(p)?;
self.lhs(p, lhs_e)
}
/// Gets a pair (prec, expr), and tries to continue parsing the rest
/// of some math string held onto by `self`, which holds the mmz file,
/// and by extension the math string we're working on.
fn expr(&mut self, initial_prec: Prec, accum: Option<MmzExpr<'b>>) -> Res<MmzExpr<'b>> {
match accum {
// This was an initial call to `expr`; there's no accumulating expression yet.
None => {
let prefix = self.prefix(initial_prec)?;
self.expr(initial_prec, Some(prefix))
},
// This is a continuation call, adding to some accumulating expression.
// These calls can only come from calls to `expr` made from within
// the module.
Some(a) => match self.take_ge_infix(initial_prec).transpose()? {
// We've reached the end of the current expression, either
// because the math-string is over, or because initial_prec > next_prec
None => Ok(a),
// Continue adding to this accumulator.
// we have: (prec_low, a) .. $ `op2:prec_high` b .. $
Some((op_term, op_prec)) => {
let b = self.prefix(op_prec)?;
let a_until_lt = self.lhs(a, (op_term, op_prec), b)?;
self.expr(initial_prec, Some(a_until_lt))
}
}
}
}
fn literals(
/// Having previously parsed a prefix or general notation and determined its
/// corresponding `term`, parse and check the arguments demanded by that `term`
/// and then apply them, returning the `App t e*` expression.
fn apply_notation(
) -> Res<BumpVec<'b, MmzExpr<'b>>> {
) -> Res<MmzExpr<'b>> {
// A vec of `[None, .., None]` for as many `NotationLit::Var {..}` are demanded by the term's signature.
// The option stuff is a safe workaround for the fact that they may occur out of order relative to the
// underlying term's signature.
let e = self.expr(*prec)?;
let nth_arg = none_err!(term.args().nth(*pos))?;
let tgt_sort = nth_arg.sort();
let coerced = self.coerce(e, tgt_sort)?;
let sig_e = none_err!(term.args().nth(*pos))?;
let e = self.expr(*prec, None)?;
let coerced = self.coerce(e, sig_e.sort())?;
/// equal to the last precedence, return (notation token, Prec, notation info)
fn peek_stronger_infix(&mut self, last_prec: Prec) -> Option<(Str<'a>, Prec, NotationInfo<'a>)> {
/// equal to the last precedence, return (token, Prec, notation info)
fn peek_ge_infix(&mut self, last_prec: Prec) -> Option<(Str<'a>, Prec, NotationInfo<'a>)> {
/// lhs and lhs2 are mutually recursive functions that form the part of the pratt parser
/// that deals with the presence of infix tokens.
fn lhs(&mut self, p: Prec, lhs_e: MmzExpr<'b>) -> Res<MmzExpr<'b>> {
if let Some((ge_infix_term, ge_infix_prec)) = self.take_ge_infix(p).transpose()? {
let rhs_e = self.prefix(ge_infix_prec)?;
let new_lhs_e = self.lhs2(lhs_e, (ge_infix_term, ge_infix_prec), rhs_e)?;
self.lhs(p, new_lhs_e)
} else {
Ok(lhs_e)
}
}
fn lhs2(
// From `(a, op1, b)`, either continue adding to the right-hand side if there's a stronger operator next
// or finish and return `App { op1 a b }`
fn lhs(
if let Some((_, next_prec, _)) = self.peek_stronger_infix(infix_prec) {
let new_rhs_e = self.lhs(next_prec, rhs_e)?;
self.lhs2(lhs_e, (infix_term, infix_prec), new_rhs_e)
// If the next token in the math-string is an infix, such that op0_prec <= op1_prec,
// recurse with lhs(a, op0, (b op1 c*..))
if let Some((op1_term, op1_prec)) = self.take_ge_infix(op0_prec).transpose()? {
let c = self.prefix(op1_prec)?;
let b_op_cprime = self.lhs(b, (op1_term, op1_prec), c)?;
self.lhs(a, (op0_term, op0_prec), b_op_cprime)
let mut args = infix_term.args();
if let (Some(fst), Some(snd), Some(_), None) = (args.next(), args.next(), args.next(), args.next()) {
let end_args = bumpalo::vec![
in self.bump;
self.coerce(lhs_e, fst.sort())?,
self.coerce(rhs_e, snd.sort())?
];
// Next item is None or a weaker infix, so return `App { op1 a b }`
let mut plus_args = op0_term.args();
if let (Some(fst), Some(snd), Some(_), None) = (plus_args.next(), plus_args.next(), plus_args.next(), plus_args.next()) {
let a_coerced = self.coerce(a, fst.sort())?;
let b_coerced = self.coerce(b, snd.sort())?;
term_num: infix_term.term_num,
num_args: infix_term.num_args_no_ret(),
args: self.alloc(end_args),
sort: infix_term.sort()
term_num: op0_term.term_num,
num_args: op0_term.num_args_no_ret(),
args: self.alloc(bumpalo::vec![in self.bump; a_coerced, b_coerced]),
sort: op0_term.sort()
## 0.1.2 -> 0.1.3
+ Add `occupied_precs` map for math-parser disambiguation.
+ refactor lhs handling in math-parser