#* Simple implementation of parsing expression grammar.
Intended to replace Treetop as the parser for Brat
but may be used for other purposes, as well.
*#
include 'scanner'
include 'terminal'
result_match = object.new
result_match.prototype.to_s = {
name = my.label || my.rule_name || "result"
"<#{name} text='#{my.text}' start=#{my.start_pos} end=#{my.end_pos}>"
}
peg = object.new
peg.init = { gramma = null |
my.named_rules = [:]
my.first = null
my.rule_id = 0
my.memo = []
gramma #Actually set up parser
}
memo_rule = { x, rule, memo |
m = memo[x.pos]
null? m
{
current = x.pos
res = rule x
true? res
{
memo[current] = [res, x.pos]
}
{
memo[current] = false
}
res
}
{
true? m
{
x.pos = m[1]
m[0]
}
}
}
peg.prototype.make_rule = { rule |
rule_memo = [:]
memo << rule_memo
{ x | memo_rule x, ->rule, rule_memo }
}
peg.make_result = { match_pos, end_pos, matched, rule_name = null |
result = result_match.new
result.start_pos = match_pos
result.end_pos = end_pos
result.matched = matched
result.label = null
result.labels = null
result.rule_name = rule_name
true? matched.string?
{ result.text = matched }
{
true? matched.has_method?(:full_match)
{ result.text = matched.full_match }
{
true? matched.array?
{ result.text = { matched.map(:text).join } }
{ true? matched.has_method?(:text)
{ result.text = matched->text }
{ result.text = "" }
}
}
}
true? matched.array?
{ result.elements = matched }
{ result.elements = [matched] }
result
}
#Parse a string
#If no start rule is given, then whichever rule was defined first will be used.
#By default, the entire string does not have to match.
peg.prototype.parse = { str, start_rule = null, fully = false |
null? start_rule
{ start_rule = my.first }
my.memo.each { v | v.clear }
s = scanner.new str
rule = my.named_rules[start_rule]
null? { ->rule } { throw "No such rule: #{start_rule}" }
new_lines = str.find_all("\n")
s.new_lines = new_lines
protect {
result = rule s
true? result
{
result.matched_all? = s.pos == str.length
assign_lines result, new_lines
}
{
x = parse_error("Failed to parse")
x(s)
}
true? { result && { fully } }
{ true? result.matched_all?, { result }, { x = parse_error("Failed to parse");x(s) }}
{ result }
}
from: "parse error"
rescue: { err |
p err
false
}
}
peg.assign_lines = { result, new_lines |
true? result.has_method?(:elements)
{
result.start_line = peg.find_line(result.start_pos, new_lines)
result.end_line = peg.find_line(result.end_pos, new_lines)
result.elements.each { r | peg.assign_lines(r, new_lines) }
}
}
peg.find_line = { pos, new_lines |
# This is basically wikipedia binary search
# Except we don't care about getting an exact match
left = 0
right = new_lines.length - 1
current = 0
unmatched = true
true? { right == -1 }
{ current = 0; unmatched = false }
true? { unmatched && { pos > new_lines[right] }}
{ current = right + 1; unmatched = false }
while { unmatched && { left <= right }}
{
current = ((left + right) / 2).to_i
when { new_lines[current] < pos } { left = current + 1 }
{ new_lines[current] > pos } { right = current - 1 }
{ true } { unmatched = false } # end it!
}
current + 1
}
peg.find_pos_info = { pos, new_lines |
pos_info = object.new
pos_info.line = find_line(pos, new_lines)
true? pos_info.line > 1
{ pos_info.line_start = new_lines[pos_info.line - 2] }
{ pos_info.line_start = 0 }
true? pos_info.line < new_lines.length
{ pos_info.line_end = new_lines[pos_info.line - 1] }
{ pos_info.line_end = pos + 10 }
pos_info.pos = pos
pos_info.line_pos = pos - pos_info.line_start
pos_info
}
seq_matcher = { x, rules |
start = x.pos
matches = rules.map_while { rule |
rule x
}
false? matches.length == rules.length
{ x.pos = start; false }
{ peg.make_result start, x.pos, matches }
}
#Match a sequence of rules
peg.prototype.seq = { rule, *rules |
rules = true? rules.empty?
{ true? function?(->rule) { [->rule] } { rule } }
{ [->rule] + rules }
make_rule { x | seq_matcher x, rules }
}
peg.prototype.seq_ref = { *rules |
m = my
seq rules.map { name | m.ref name }
}
any_matcher = { x, rules |
rules.each_until { opt |
opt x
}
}
#Match any of a set of rules
peg.prototype.any = { rule, *rules |
rules = true? rules.empty?
{ true? function?(->rule) { [->rule] } { rule } }
{ [->rule] + rules }
make_rule { x | any_matcher x, rules }
}
peg.prototype.any_ref = { *rule_names |
m = my
any rule_names.map { name | m.ref name }
}
#Match a rule the specified number of times
peg.prototype.num_of = { rule, min, max |
make_rule { x |
matches = []
start = x.pos
while {
matched = rule x
true? matched
{ matches << matched }
}
true? num_matches >= min
{ null? max
{ peg.make_result start, x.pos, matches }
{ true? num_matches <= max
{ peg.make_result start, x.pos, matches }
}
}
}
}
maybe_matcher = { x, rule |
start = x.pos
matched = rule x
true? matched
{
res = peg.make_result start, x.pos, matched
res.rule_name = "#{matched.rule_name || 'result'}?"
res
}
{
peg.make_result start, start, ""
}
}
#Match zero or one occurrences of rule
peg.prototype.maybe = { rule |
make_rule { x | maybe_matcher x, ->rule }
}
kleene_matcher = { x, rule |
matches = []
start = x.pos
while {
res = rule x
true? res
{ matches << res }
}
res = peg.make_result start, x.pos, matches
false? matches.empty?
{
rule_name = matches.first.rule_name
res.rule_name = "#{rule_name || 'result'}*"
}
res
}
#Match zero or more occurrences of rule
peg.prototype.kleene = { rule |
make_rule { x | kleene_matcher x, ->rule }
}
many_matcher = { x, rule |
start = x.pos
matches = []
matched = rule x
true? matched
{ matches << matched }
while {
wmatched = rule x
true? wmatched
{ matches << wmatched }
}
false? matches.empty?
{
rule_name = matches.first.rule_name
res = peg.make_result start, x.pos, matches
res.rule_name = "#{rule_name}+"
res
}
}
#Match one or more occurrences of rule
peg.prototype.many = { rule |
make_rule { x | many_matcher x, ->rule }
}
str_matcher = { x, literal |
start = x.pos
matched = x.scan_string literal
true? matched
{ peg.make_result start, x.pos, matched }
}
#Match a string literally
peg.prototype.str = { literal |
make_rule { x | str_matcher x, literal }
}
regex_matcher = { x, reg_literal |
start = x.pos
matched = x.scan_regex reg_literal
true? matched
{ peg.make_result start, x.pos, matched }
}
#Match a regular expression
peg.prototype.reg = { reg_literal |
make_rule { x | regex_matcher x, reg_literal }
}
no_matcher = { x, rule |
start = x.pos
res = rule x
stop = x.pos
x.pos = start
true? res
{ false }
{ peg.make_result start, stop, res }
}
#Specify that a rule should NOT match
peg.prototype.no = { rule |
make_rule { x | no_matcher x, ->rule }
}
and_matcher = { x, rule |
start = x.pos
matched = rule x
stop = x.pos
x.pos = start
true? matched
{ peg.make_result start, stop, "" }
}
#Specific that a rule should match, but do not advance
#position (lookahead)
peg.prototype.and = { rule |
make_rule { x | and_matcher x, ->rule }
}
set_namer = { x, rule, name |
res = rule x
true? res
{
res.rule_name = name
}
res
}
#Set a named rule
peg.prototype.set = { name, rule |
null? my.first
{ my.first = name }
r = make_rule { x | set_namer x, ->rule, name }
my.named_rules[name] = ->r
}
anything_matcher = { x |
true? x.end?
{ false }
{
cur = x.pos
res = x.str[x.pos]
x.pos = x.pos + 1
peg.make_result cur, x.pos, res
}
}
#Matches one of anything.
peg.prototype.anything = {
make_rule ->anything_matcher
}
ref_matcher = { x, rules, name |
r = rules[name]
null? { ->r }
{ throw "No such rule: #{name}" }
r x
}
#Reference a named rule
peg.prototype.ref = { name |
rules = my.named_rules
make_rule { x | ref_matcher x, rules, name }
}
peg.fetch_labels = { rule |
labels = []
true? rule.label
{ labels << rule }
true? rule.labels
{ labels.concat rule.labels }
{
other_rules = rule.elements.flat_map({ r |
true? r.has_method?(:label)
{
peg.fetch_labels(r)
}
{ [] }
})
rule.labels = other_rules
labels.concat other_rules
}
labels
}
action_matcher = { x, rule, block |
res = rule x
true? res
{
peg.add_labels res
res.with_this ->block
res
}
}
peg.prototype.parse_error = { msg |
{ x |
pos_info = peg.find_pos_info(x.pos, x.new_lines)
snippet = true? pos_info.line == 1
{ x.str[0, pos_info.line_end].chomp }
{ x.str[pos_info.line_start + 1, pos_info.line_end].chomp }
line_highlight = true? terminal.tty?
{ "\27[33m#{'-' * (pos_info.line_pos - 1)}^\27[0m" }
{ "#{'-' * (pos_info.line_pos - 1)}^" }
message = "\rBrat parser:\n\tA syntax error was encountered while parsing the code.\n\t#{msg} on line #{pos_info.line}:\n\n\t#{snippet}\n\t#{line_highlight}"
throw exception.new(message, :'parse error')
}
}
#Execute action upon match
peg.prototype.action = { rule, block |
make_rule { x | action_matcher x, ->rule, ->block }
}
peg.add_labels = { res |
labeled_rules = peg.fetch_labels res
labeled_rules.each { result |
false? res.has_method?(result.label)
{ res.add_method result.label, { result } }
}
}
peg.prototype.label = { label, rule |
make_rule { x |
res = rule x
true? res
{ res.label = label; res }
}
}
export peg, "peg"