123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287openBatteriesopenAsttype'aloc='aAsttypes.loctypefun_arg=Asttypes.arg_label*expressionoption*pattern(** represents a nanopass definition **)typenp_pass={npp_name:string;npp_loc:Location.t;npp_input:Lang.np_language(* source language *);npp_output:Lang.np_language(* target language *);npp_pre:expression->expression(* generates expressions to precede productions / entry *);npp_post:expression(* entry point expression *);npp_procs:np_processorlist(* proccessors *)}(** represents a processor definition (a transformation
between nonterminals in a nanopass) **)andnp_processor={npc_name:string;npc_loc:Location.t;npc_dom:Lang.np_nonterm(* domain nonterminal *);npc_cod:Lang.np_nontermoption(* co-domain nonterminal (or terminal, when [None]) *);npc_args:fun_arglist(* arguments to processor *);npc_clauses:clauselist(* processor clauses *);npc_clauses_loc:Location.t}andclause=np_pat*expression(** represents a pattern in a production. the pattern must be parsed
by nanocaml so that we can correctly map over lists and apply
catamorphims, e.g. for expressions like [(x, e [@r]) [@l]]. **)andnp_pat=(* TODO: [] and :: patterns *)|NPpat_anyofLocation.t(* _ *)|NPpat_varofstringloc(* x *)|NPpat_aliasofnp_pat*stringloc(* p as x *)|NPpat_tupleofnp_patlist*Location.t(* (p, ...) *)|NPpat_variantofstring*np_patoption*Location.t(* `X p *)|NPpat_mapofnp_pat(* list destructuring, e.g. (p [@l]) *)|NPpat_cataofnp_pat*expressionoption(* p [@r <optional explicit cata>] *)(** returns the [Location.t] of the given pattern. **)letrecloc_of_pat=function|NPpat_anyloc->loc|NPpat_var{loc}->loc|NPpat_alias(_,{loc})->loc|NPpat_tuple(_,loc)->loc|NPpat_variant(_,_,loc)->loc|NPpat_mapp->loc_of_patp|NPpat_cata(p,_)->loc_of_patp(** convert the RHS of a [let] into a [np_processor]. **)letrecprocessor_of_rhs~name~dom~cod~loce0=letrecget_argsacc=function|{pexp_desc=Pexp_fun(lbl,dflt,pat,body)}->letarg=lbl,dflt,patinget_args(arg::acc)body|{pexp_desc=Pexp_functioncases;pexp_loc=clauses_loc}->List.revacc,cases,clauses_loc|{pexp_loc=loc}->Location.raise_errorf~loc"processor must end in 'function' expression"inletclause_of_case{pc_lhs=p;pc_rhs=e;pc_guard=g}=matchgwith|Some{pexp_loc=loc}->Location.raise_errorf~loc"guards not allowed in nanopass clauses"|None->pat_of_patternp,einletargs,cases,clauses_loc=get_args[]e0inletclauses=List.mapclause_of_casecasesin{npc_name=name;npc_dom=dom;npc_cod=cod;npc_loc=loc;npc_args=args;npc_clauses=clauses;npc_clauses_loc=clauses_loc}(** convert a [pattern] into a [np_pat]. **)andpat_of_patternp=letbase_pat=matchp.ppat_descwith|Ppat_any->NPpat_anyp.ppat_loc|Ppat_varx->NPpat_varx|Ppat_alias(p,name)->NPpat_alias(pat_of_patternp,name)|Ppat_tupleps->NPpat_tuple(List.mappat_of_patternps,p.ppat_loc)|Ppat_variant(v,arg)->NPpat_variant(v,Option.mappat_of_patternarg,p.ppat_loc)|_->Location.raise_errorf~loc:p.ppat_loc"this kind of pattern not allowed in nanopass clause"inp.ppat_attributes|>List.fold_left(funpat(attr,payload)->let{txt;loc}:stringloc=attrinmatchtxt,payloadwith|"l",_->NPpat_mappat|"r",_->beginmatchpayloadwith|PStr[{pstr_desc=Pstr_eval(e,_)}]->NPpat_cata(pat,Somee)|PStr[]->NPpat_cata(pat,None)|_->Location.raise_errorf~loc"invalid argument to [@r] attribute"end|_->pat)base_patletsignature_arrow="=>"(** extract [L0] and [L1] out of expression of form [L0 --> L1].
returns [("L0", loc_L0), ("L1", loc_L1)] (for this particular example). **)letextract_pass_sig=function|{pexp_desc=Pexp_apply({pexp_desc=Pexp_ident{txt=Lidentarrow}},[Nolabel,{pexp_desc=Pexp_construct({txt=Lidentl0_name;loc=l0_loc},None)};Nolabel,{pexp_desc=Pexp_construct({txt=Lidentl1_name;loc=l1_loc},None)}])}whenarrow=signature_arrow->(l0_name,l0_loc),(l1_name,l1_loc)|{pexp_loc=loc}->Location.raise_errorf~loc"invalid language specification; expected 'LX %s LY'"signature_arrow(** extract domain and co-domain from the name of a production.
the rules are:
y_of_x => dom="x", cod="y"
x_to_y => dom="x", cod="y"
x => dom=cod="x"
x_f => dom="x", cof=None
if the co-domain is not a valid nonterm of the output language,
then the co-domain is None.
given the string name, returns [dom, opt_cod].
**)letextract_dom_cod~locl0l1name=letget_ntlangname=tryLang.language_nontermlangnamewithNot_found->Location.raise_errorf~loc"no such nonterminal %S in language %S"namelang.Lang.npl_nameinletget_nt_optlangname=trySome(Lang.language_nontermlangname)withNot_found->Nonein(* TODO: not split on '_'!!!!! instead just search for "of"/"to" *)matchString.split_on_char'_'namewith|[cod;"of";dom]->get_ntl0dom,get_nt_optl1cod|[dom;"to";cod]->get_ntl0dom,get_nt_optl1cod|[both]->get_ntl0both,get_nt_optl1both|dom::_->get_ntl0dom,None|_->Location.raise_errorf~loc"unable to infer processor input/output from processor's name"(** convert a [value_binding] into a [np_pass] *)letpass_of_value_binding=function|{pvb_pat={ppat_desc=Ppat_var{txt=name}};pvb_loc=loc;pvb_expr=e0;pvb_attributes=pass_attr::_}->(* parse language from [[@pass L0 --> L1]] *)letfind_lang~locl=Lang.find_languagel~exn:(Location.Error(Location.errorf~loc"language %S has not been defined"l))inletl0,l1=matchsndpass_attrwith|PStr[{pstr_desc=Pstr_eval(lang_expr,[])}]->let(l0_name,l0_loc),(l1_name,l1_loc)=extract_pass_siglang_exprinfind_langl0_locl0_name,find_langl1_locl1_name|_->Location.raise_errorf~loc:(fstpass_attr).loc"invalid [@pass] syntax"in(* convert expression [e] into [f, vbs, body], where
[vbs] are the value_bindings of the processors, [body]
is the final expression, and [f] is a function that inserts
its argument in place of the processors/body. *)letrecextract_definitionsf=function|{pexp_desc=Pexp_extension({txt="passes"},PStrstmts);pexp_loc=passes_loc}->letentry=refNoneinletextract_stmt_bindings=beginfunction|{pstr_desc=Pstr_value(Recursive,vbs)}->letset_entry_name=beginfunction|Ppat_var{txt=name}->entry:=Somename|_->()endinList.iter(funvb->ifList.exists(fun({Asttypes.txt},_)->txt="entry")vb.pvb_attributesthenset_entry_namevb.pvb_pat.ppat_desc)vbs;vbs|_->[]endinletvbs=List.fold_right(funbindingslst->extract_stmt_bindingsbindings@lst)stmts[]andbody=match!entrywith|None->failwith"[%passes ...] requires a designated [@entry] function"|Someid->{pexp_desc=Pexp_ident{txt=Lidentid;loc=passes_loc};pexp_loc=passes_loc;pexp_attributes=[]}inf,vbs,body|{pexp_desc=Pexp_fun(lbl,dflt,pat,body)}ase->extract_definitions(fune'->f{ewithpexp_desc=Pexp_fun(lbl,dflt,pat,e')})body|{pexp_desc=Pexp_letmodule(name,mod_expr,body)}ase->extract_definitions(fune'->f{ewithpexp_desc=Pexp_letmodule(name,mod_expr,e')})body|{pexp_desc=Pexp_letexception(exn,body)}ase->extract_definitions(fune'->f{ewithpexp_desc=Pexp_letexception(exn,e')})body|{pexp_desc=Pexp_let(recf,vbs,({pexp_desc=Pexp_let_}asbody))}ase|({pexp_desc=Pexp_let(recf,vbs,({pexp_desc=Pexp_extension_}asbody))}ase)->extract_definitions(fune'->f{ewithpexp_desc=Pexp_let(recf,vbs,e')})body|{pexp_desc=Pexp_let(Recursive,vbs,body)}->f,vbs,body|{pexp_loc=loc}->Location.raise_errorf~loc"let[@pass] must end in either a [%%passes ...] block or a recursive let, followed by a single expression"inletpre,bindings,post=extract_definitionsidentitye0in(* parse processors from bindings in final letrec *)letprocs=List.map(function|{pvb_pat={ppat_desc=Ppat_var{txt=name}};pvb_expr=proc_rhs;pvb_loc=loc;pvb_attributes=ats}->(* parse dom/cod names *)let(dom,cod)=extract_dom_cod~locl0l1nameinprocessor_of_rhs~name~loc~dom~codproc_rhs|{pvb_loc=loc}->Location.raise_errorf~loc"invalid processor definition")bindingsin{npp_name=name;npp_loc=loc;npp_input=l0;npp_output=l1;npp_pre=pre;npp_post=post;npp_procs=procs}|{pvb_loc=loc}->Location.raise_errorf~loc"invalid pass definition"