123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254openBatteriesopenAst(** a type recognized by nanopass; usually a part of a production.
e.g. [string], [stmt], [(string * expr) list] **)typenp_type=|NP_termofcore_type(** external types are "terminals" **)|NP_nontermofstring(** named nonterminal **)|NP_tupleofnp_typelist(** [t * u * ...] **)|NP_listofnp_type(** [t list] **)(** a production is one of the forms in a nonterminal -- essentially
just a variant, e.g. [`Var], [`App]. **)typenp_production={nppr_name:string;nppr_arg:np_typeoption}(** a nonterminal is a type defined by a nanopass language, e.g.
[expr], [stmt]. **)typenp_nonterm={npnt_loc:Location.t;npnt_name:string;npnt_productions:np_productionlist}(** a nanopass language, e.g. L0, L1 (as traditionally named) **)typenp_language={npl_loc:Location.t;npl_name:string;npl_nonterms:np_nontermlist(* TODO: hash tbl? *)}letrecstring_of_type=function|NP_termcore_type->beginmatchcore_type.ptyp_descwith|Ptyp_constr({txt=l},_)->"<core> ("^List.last(Longident.flattenl)^")"|_->"<core>"end|NP_nonterms->s|NP_tuplet->"("^String.concat","(List.mapstring_of_typet)^")"|NP_listt->"["^string_of_typet^"]"(** global table of all defined languages. **)(* TODO: nv wants to make this into a real database,
which would allow caching, cross-file nanopass, etc. *)letlanguages:(string,np_language)Hashtbl.t=Hashtbl.create30(** globally registers the given language. raises
[Location.Error] if a language with the same
name is already defined. **)letadd_languagelang=ifHashtbl.memlanguageslang.npl_namethenLocation.raise_errorf~loc:lang.npl_loc"language %S defined already"lang.npl_nameelseHashtbl.addlanguageslang.npl_namelang(** returns the language with the given name. raises
[Not_found] if no such language has been defined. **)letfind_language?(exn=Not_found)name=Option.get_exn(Hashtbl.find_optionlanguagesname)exn(** [language_nonterm l name] returns the nonterminal
in language [l] with the given name. raises [Not_found]
if no such nonterminal. *)letlanguage_nonterm?(exn=Not_found)langname=List.find_exn(funnt->nt.npnt_name=name)exnlang.npl_nonterms(** convert [core_type] into nanopass type. **)lettype_of_core_type~nt_namest=letreccvtptyp=matchptyp.ptyp_descwith(* nonterminal: *)|Ptyp_constr({txt=Longident.Lidentname},[])whenList.memnament_names->NP_nontermname(* tuple: *)|Ptyp_tupletyps->letnpts=List.mapcvttypsinNP_tuplenpts(* list: *)|Ptyp_constr({txt=Longident.Lident"list"},[elem])->NP_list(cvtelem)(* otherwise, it's a terminal: *)|_->NP_termptypincvtt(** convert [row_field] (from polymorphic variant) into nanopass production **)letproduction_of_row_field~nt_names=function|Rtag(name,_,_,args)->{nppr_name=name;nppr_arg=matchargswith|[t]->Some(type_of_core_type~nt_namest)|_->None}|Rinherit{ptyp_loc=loc}->Location.raise_errorf~loc"invalid nanopass production form"(** convert [type_declaration] into nanopass nonterminal **)letnonterm_of_type_decl?extending~nt_names=function(* type nt = [ `A | `B ... ] *)|{ptype_name={txt=name};ptype_loc=loc;ptype_params=[];ptype_kind=Ptype_abstract;ptype_manifest=Some{ptyp_desc=Ptyp_variant(rows,Closed,_)}}->letprods=List.map(production_of_row_field~nt_names)rowsin{npnt_loc=loc;npnt_name=name;npnt_productions=prods}(* type nt = { add : [ `A ... ] ; del : [ `B ... ] } *)|{ptype_name={txt=name};ptype_loc=loc;ptype_params=[];ptype_kind=Ptype_recorddecls}->letlang=Option.get_exnextending(Location.Error(Location.errorf~loc"must be extending a language to use this form"))inletold_nontem=language_nontermlangname~exn:(Location.Error(Location.errorf~loc"no such nonterminal %S in language %S"namelang.npl_name))in(* get the 'lname' label out of the record, and parse
the productions contained in the type. *)letget_prodslname=matchList.find_opt(fun{pld_name={txt=x}}->x=lname)declswith|None->None|Some{pld_type={ptyp_desc=Ptyp_variant(rows,Closed,_)}}->Some(List.map(production_of_row_field~nt_names)rows)|Some_->Location.raise_errorf~loc"invalid extended production"in(* create functions for adding productions / deleting productions
if the 'add' or 'del' labels are omitted, then nothing is added / removed. *)letadd=Option.map_default(funadd_prs->List.appendadd_prs)identity(* do nothing when [None] *)(get_prods"add")inletdel=Option.map_default(fundel_prs->letkeepp=List.for_all(funp'->p.nppr_name<>p'.nppr_name)del_prsinList.filterkeep)identity(get_prods"del")inletprods=old_nontem.npnt_productions|>del|>addin{npnt_loc=loc;npnt_name=name;npnt_productions=prods}(* invalid nonterminal *)|{ptype_loc=loc}->Location.raise_errorf~loc"invalid nanopass type declaration form"(** convert [module_binding] into nanopass language **)letlanguage_of_module=function(* module L = struct type nt = ... end *)(* must be one single recursive type decl *)|{pmb_name={txt=lang_name};pmb_loc=loc;pmb_expr={pmod_desc=Pmod_structure[{pstr_desc=Pstr_type(Recursive,type_decls)}]}}->letnt_names=List.map(fun{ptype_name={txt}}->txt)type_declsinletnonterms=List.map(nonterm_of_type_decl~nt_names)type_declsin{npl_loc=loc;npl_name=lang_name;npl_nonterms=nonterms}(* module L = struct
include L'
type nt = ...
end *)(* must be a single include + a single recursive type decl*)|{pmb_name={txt=lang_name};pmb_loc=loc;pmb_expr={pmod_desc=Pmod_structure[{pstr_desc=Pstr_include{pincl_mod={pmod_desc=Pmod_ident{txt=Lidentext_lang_name}}}};{pstr_desc=Pstr_type(Recursive,type_decls)}]}}->(* the language we are extending *)letext_lang=find_languageext_lang_name~exn:(Location.Error(Location.errorf~loc"language %S has not been defined"ext_lang_name))in(* new nonterminal names *)letnt_names=List.map(fun{ptype_name={txt}}->txt)type_declsin(* old nonterminal names *)letnt_names'=List.map(fun{npnt_name}->npnt_name)ext_lang.npl_nontermsin(* new nonterminals *)letnonterms=List.map(nonterm_of_type_decl~extending:ext_lang~nt_names:(nt_names@nt_names'))type_declsin(* old nonterminals (only the unmodified ones) *)letnonterms'=List.filter_map(funname->ifList.memnament_namesthenNoneelseSome(language_nontermext_langname))nt_names'in{npl_loc=loc;npl_name=lang_name;npl_nonterms=nonterms@nonterms'}|{pmb_loc=loc}->Location.raise_errorf~loc"invalid nanopass language form"