2CXQ53RHKGIT5KR7VHOVRVHCD5MK4V2J3AJDQ3CNSJLHJXXA4GXQC YU2N2HEPNDEB65LGBWESEHNZNCZ63H3MGT4R7VLX4KMZHFYC4SIAC L7YV2TJYOBRNUT7FPSY352O3BPZD7CNO3XG7PY433L54XYBY5AZQC UW27LKXM2BJ77FQLTY4WPKDSSWI2RFNFRJ7CB4U3TS7KYVIV72LQC KMV35KHLCXIV5YQUDRSTS6PIMLNIL2KTJZS77XXMPA6IX37NHZ2QC A56GZW6HLIZQ6NX47XFPEPPGLZAA725KNYST7VK2MMK3DCCIXAXAC PXI442CY2KQHHAIJ3UNCWKTAI4IFYNGYEBRQMDR6T53YZTY2VMMQC NBEO3TPNOUG7MRFYRSDDFB5TQKTEDL6GHHFQVOB5MXVPIBKFNZKAC O6PFGAUDYCMK6SC6V5RB5ELXZ7W54OB7XPYCMECCA4BSBUVLFAPAC GW4AAYNF7I66D72G5PMFTQRK7B4KZVYKAHKRPC2IY7IX37JKEHJQC JDZASPALXSFZOL3MXCKBPX74CUD3W743ZJ6W2422FIJ7NJOD67ZAC A2JAXDJWT2FAKADYOY6QOQ7LQRMTTCDIOYT7STSESVHLZQEQJBMAC 5XO7IKBGCVXGVWMDJDE5MELS4FWRITKAU6NNV36NQ4TOZRR7UQ7QC MPN7OJSZD5CS5N7WWS3ZSOYE7ZRCABIBHZDMHVS6IT25EO2INK7AC function rep(s, form, result) {form = read_str(s)result = _eval(form)_print(result)if(N > (N_at_last_gc + GC_EVERY)) {# for debugging garbage collector:# logg_dbg("rep", "auto-gc at N " N)# _dump_dot("pre-gc-" N ".dot")# _gc_dot(_cons(_GLOBALS,# _cons(_MACROS,# _cons(_COMPARED_SYMBOLS,# _nil()))),# "auto-gc-" N "-marks.dot",# "auto-gc-" N "-sweeps.dot")# _dump_dot("post-gc-" N ".dot")
if(PROMPT == 0) PROMPT = "user> "_prompt()
if(PROMPT == 0) PROMPT = "* "_forget_parse_state()# it appears that during BEGIN, if we are operating on a file# specified on the command line, FILENAME is not set yet. So we# have to find out whether any filename was specified, instead.if(length(ARGV) < 2) _prompt()
{ rep($0); _prompt() }
{# the reason to keep lists of parser state is so we can have forms# that span lines, rather than requiring each line of input to be# a complete form.tokenize($0)read_forms_into(_TO_EVAL)if(!FILENAME || (FILENAME == "-")) _print_eval_all()else _just_eval_all()if(length(_TOKENS) == 0 && length(_TO_EVAL) == 0) {# we have come to the end of all complete expressions in the# input so far. this is important so that we don't GC data# that we have read but not yet eval'd._maybe_gc()_prompt()}}END {_incomplete_parse_at_end()}
function tokenize_into(input, ta, chars_left, tal, mp) {
function _forget_parse_state() {# logg_dbg("_forget_parse_state")delete _TOKENSdelete _WHERE_INSIDE_STRING = 0}function _push_token(t, n) {# logg_dbg("_push_token", t)n = length(_TOKENS)+1_TOKENS[n] = t_WHERE[n] = FILENAME ":" FNR}function tokenize(input, chars_done, chars_left, top) {
if(match(input, /^[ ]+/)) {} else if(match(input, /^~/)) {ta[++tal] = substr(input, 1, RLENGTH)} else if(match(input, /^,@/)) { # unquote-splicingta[++tal] = substr(input, 1, RLENGTH)} else if(match(input, /^[\[\]{}()'`,!^@]/)) { # special single charta[++tal] = substr(input, 1, RLENGTH)} else if(match(input, /^"(\\.|[^\\"])*"?/)) { # double-quoted stringta[++tal] = substr(input, 1, RLENGTH)} else if(match(input, /^;.*/)) { # commentta[++tal] = substr(input, 1, RLENGTH)} else if(match(input, /^[^ \[\]{}('"`,;)@]+/)) { # non-special charsta[++tal] = substr(input, 1, RLENGTH)
# logg_dbg("tokz", "input is [" input "]; inside string? " _INSIDE_STRING)top = length(_TOKENS)# warning: don't match() regexes on anything but input here.# we use RSTART and RLENGTH below, which match() sets. all# patterns must start with ^, so RSTART will always be 1.if(_INSIDE_STRING) {if(match(input, /^"/)) { # immediate string end_TOKENS[top] = _TOKENS[top] substr(input, 1, RLENGTH)_INSIDE_STRING = 0} else if(match(input, /^(\\[^"]|[^\\"])*[^\\]"/)) { # stuff"# logg_dbg("tokz", "leaving string")_TOKENS[top] = _TOKENS[top] substr(input, 1, RLENGTH)_INSIDE_STRING = 0} else { # no end quote in input. stay inside# newline inside string escaped?if(match(input, /^(\\.|[^\\])*\\$/)) {# keep the backslash off_TOKENS[top] = _TOKENS[top] substr(input, 1, RLENGTH-1)} else {_TOKENS[top] = _TOKENS[top] input "\n"# everything's OK, but we have not actually# matched any regexps. set RSTART and RLENGTH to# say we matched the whole line.RSTART = 1RLENGTH = length(input)+1}}
logg_err("tokz", "unrecognized input at char " \chars_done ": " input)exit 1
if(match(input, /^[ ]+/));else if(match(input, /^~/))_push_token(substr(input, 1, RLENGTH))else if(match(input, /^,@/)) # unquote-splicing_push_token(substr(input, 1, RLENGTH))else if(match(input, /^[\[\]{}()'`,!^@]/)) # special single char_push_token(substr(input, 1, RLENGTH))else if(match(input, /^"(\\.|[^\\"])*"/)) { # double-quoted string# logg_dbg("tokz", "complete string")_push_token(substr(input, 1, RLENGTH))} else if(match(input, /^"(\\([^"]|$)|[^\\"])*/)) { # dq string no end# logg_dbg("tokz", "incomplete string")if(substr(input, RLENGTH, 1) == "\\")_push_token(substr(input, 1, RLENGTH-1))else_push_token(substr(input, 1, RLENGTH) "\n")_INSIDE_STRING = 1# logg_dbg("tokz", "_INSIDE_STRING")} else if(match(input, /^;.*/)) # comment_push_token(substr(input, 1, RLENGTH))else if(match(input, /^[^ \[\]{}('"`,;)@]+/)) # non-special chars_push_token(substr(input, 1, RLENGTH))else {if(!FILENAME || FILENAME == "-") {logg_err("tokz", "at char " chars_done ": " \"unrecognized input: " input)chars_left = 0_forget_parse_state()} else {logg_err("tokz"," " FILENAME ":" FNR ": " \"at char " chars_done ": " \"unrecognized input: " input)exit(1)}}
logg_err("tokz", "at char " chars_done ", token not matched: " input)exit 1
if(!FILENAME || FILENAME == "-") {logg_err("tokz", "at char " chars_done ": "\"token not matched: " input)chars_left = 0_forget_parse_state()} else {logg_err("tokz", " " FILENAME ":" FNR ": " \"at char " chars_done ": " \"token not matched: " input)exit(1)}
function read_str(s, i, ta, tal) {delete ta[1] # make sure ta is an array for gawk -ctokenize_into(s, ta)tal = length(ta)i[1] = 1 # make i an array so we can pass it by referencereturn read_form(i, ta, tal, 0)}function read_form(i,ta,tal, quote) {if(ta[i[1]] == "'") {quote = "quote"i[1]++} else if(ta[i[1]] == "`") {quote = "quasiquote"i[1]++} else if(ta[i[1]] == ",") {quote = "unquote"i[1]++} else if(ta[i[1]] == ",@") {quote = "unquote-splicing"i[1]++} else {quote = 0
# before, with the mal-step-2-like reader, we knew the ending# nesting_level would be 0. there would always be one form to# evaluate. the question was whether it was an atom or a list; if a# list, recursion would take us to the end of the form (and no# farther: multiple forms on the same line would be ignored). if the# nesting level at the end were not zero, it would be an error, on# that line.## now, with multiline forms, the form and the line are not constrained# to the same extent.## - there may not be a complete form to read yet. we want to avoid# starting to read anything until we know we have enough tokens to# finish reading it.## - there could be multiple forms on one# line, and so we may end up reading more than one.## - there could be a fractional form at the end. we want to consume# the full form(s) and leave the half forms.## and that means that when we read a list, we need its starting _and# ending_ indices.function read_forms_into(rvs, a, b, i, t, quote, topp, list_found, nesting_level) {delete rvs[1]if(!a) {a=1b=length(_TOKENS)# don't try to read an incomplete stringif(_INSIDE_STRING) b--topp=1 # we are the toplevel read_forms_into
if(match(ta[i[1]], /^\(/)) {# logg_dbg("read_form", "( at token " i[1])i[1] += 1return read_list(i,ta,tal,quote)} else {return read_atom(i,ta,tal,quote)
logg_dbg("read_forms_into", "a " a " b " b " ingoing length(rvs) " length(rvs))while(a <= b) { # we may break out, too, belowt = _TOKENS[a]# logg_dbg("read_forms_into", "reading " a " = " t)logg_dbg("read_forms_into", "token " t " quote? " quote)if(t == "'") {quote = "quote"; a++; continue} else if(t == "`") {quote = "quasiquote"; a++; continue} else if(t == ",") {quote = "unquote"; a++; continue} else if(t == ",@") {quote = "unquote-splicing"; a++; continue}if(t == "(") {list_found = 0nesting_level = 1for(i=a+1; i<=b; i++) {if(_TOKENS[i] == "(") nesting_level++else if(_TOKENS[i] == ")") nesting_level--if(nesting_level==0) {# the list begun at a has ended at ilist_found = 1break}}if(list_found) {# logg_dbg("read_forms_into", "list from " a " to " i)rvs[length(rvs)+1] = read_list(a+1, i-1, quote)quote = 0# logg_dbg("read_forms_into", "done from " a " to " i)a = i+1} else {# logg_dbg("read_forms_into", "incomplete list, not reading")# the list must not be completely input yet. we can't# read it.return}} else if(t == ")") {# we already dealt with positive nesting levels; this is# negativelogg_err("read_forms_into", "too many )'s at " _WHERE[length(_WHERE)])_forget_parse_state()return} else {logg_dbg("read_forms_into", "atom at " a " token is " t " quote? " quote)rvs[length(rvs)+1] = read_atom(a, quote)quote = 0a++}}# logg_dbg("read_forms_into", "a " a " b " b " lrvs " length(rvs))if(topp) {# we have read all the _TOKENS. but! maybe we are _INSIDE_STRINGif(!_INSIDE_STRING) {# ok. we can forget everything._forget_parse_state()}
function read_list(i, ta, tal, quote, prevtail, head) {
# support older api, where we put one string in, and get one eval'd# value out. there can be multiple expressions so we'll return the# value of the last one. really this is a bit like _evprog but with an# awk array. anyway this is the api lib-eval.awk uses.function eval_read_str(s, l, i, rv) {tokenize(s)read_forms_into(_TO_EVAL)l = length(_TO_EVAL)for(i=1; i<=l; i++)rv = _eval(_TO_EVAL[i])delete _TO_EVAL}function read_list(a, b, quote, i, forms, prevtail, head) {
for(; (i[1]<=tal) && (ta[i[1]] !~ /^[).]/); i[1]++) {# logg_dbg("rd_l", "in loop, i is " i[1] "; token is " ta[i[1]])head = _cons(read_form(i, ta, tal), head)}# logg_dbg("read_list", "after loop, i[1] is " i[1] "; token is " ta[i[1]] "; head is " head)prevtail = headhead = _nreverse(head)if(ta[i[1]] == ".") {i[1] += 1_set_cdr(prevtail, read_form(i, ta, tal))i[1] += 1} else if(ta[i[1]] ~ /^\)/) { # properly terminated# logg_dbg("read_list", "after _nreverse, head is " head)
## orientation: _TOKENS[a-1] == "(" and _TOKENS[b+1] == ")"if(((b-a+1) >= 3) && _TOKENS[b-1] == ".") {# the end of the list is dotted# logg_dbg("read_list", "dotted. reading forms " b-2 " and " b ".")read_forms_into(forms, a, b-2)read_forms_into(forms, b, b)head = _cons(forms[length(forms)-1], forms[length(forms)])delete forms[length(forms)]delete forms[length(forms)]delete forms[length(forms)]
function _smoke_test_reader() {x = read_str("(foo \"bar\" baz 3.14159 (sublist 1 2 3))", ta)logg_inf("_smoke_test_reader", "final result: " x ", being " _repr(x))
function _incomplete_parse_at_end( i, l, nesting_level, unclosed) {delete unclosedl = length(_TOKENS)if(l) {if(_INSIDE_STRING) {logg_err("_incomplete_parse_at_end","still inside string begun at " _WHERE[l])}for(i=1; i<=l; i++) {if(_TOKENS[i] == "(") {unclosed[length(unclosed)+1] = i} else if(_TOKENS[i] == ")") {delete unclosed[length(unclosed)]}}if(length(unclosed) > 0) {for(i=length(unclosed); i>=1; i--) {logg_err("_incomplete_parse_at_end","still inside list (" _TOKENS[unclosed[i]+1] \"... begun at " _WHERE[unclosed[i]])}}}
}function _maybe_gc() {if(N > (N_at_last_gc + GC_EVERY)) {# for debugging garbage collector:# logg_dbg("rep", "auto-gc at N " N)# _dump_dot("pre-gc-" N ".dot")# _gc_dot(_cons(_GLOBALS,# _cons(_MACROS,# _cons(_COMPARED_SYMBOLS,# _nil()))),# "auto-gc-" N "-marks.dot",# "auto-gc-" N "-sweeps.dot")# _dump_dot("post-gc-" N ".dot")_gc(_cons(_GLOBALS,_cons(_MACROS,_cons(_COMPARED_SYMBOLS,_nil()))))N_at_last_gc = N}