1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693(* MIT License
*
* Copyright (c) 2025 Frédéric Bour
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*)(** DFA construction and analysis for LR error pattern matching
This module implements a deterministic finite automaton (DFA) construction for
analyzing failures of an LR automaton by consuming its stack.
Architecture:
- NFA module: Constructs NFA (nondeterministic finite automaton) from
regular expressions specifying error patterns. Transitions are lazy — NFA
states are only materialized when explored during determinization. Uses
[K.derive] to compute transitions, then partitions them by label equivalence
(via [IndexRefine.annotated_partition]) to merge transitions with the same
filter, captures, and usage.
- DFA module: Converts the NFA to a DFA using a modified power set
construction (ordered to respect clause priorities). This is a "power
sequence" construction, not a power set — the order of NFA states in each
kernel matters for priority resolution. Three key differences from standard
subset construction:
- NFA states in each kernel are ordered by priority
- Only paths corresponding to reachable LR stacks are determinized,
omitting transitions to unreachable configurations (automata implication)
- Branches that can never fire due to lower priority are implicitly pruned,
avoiding combinatorial state explosion
Hash-consing ensures canonical representation of equivalent DFA states.
The DFA states contain:
- A kernel of NFA states (ordered by priority)
- Transitions with mappings to relate the kernels of the source and target
state (to answer questions like which NFA state of the source an NFA
state of the target comes from?)
- Dataflow module: Performs multi-pass fixpoint analysis on the DFA:
- Reachability of branches from accepting states
- Marking of reachable transitions (usage tracking)
- Dead-code analysis and unreachable clause warnings
- Priority splits for distinguishing clause precedence
- Priority chain construction via [Order_chain] for dynamically ordering
continuations from the same branch
- Liveness of captured variables
- Defined variables at each state
- Variable class computation for register allocation
- Register allocation for captured values
Register allocation is done lazily based on live ranges. The naive greedy
allocation assigns registers according to variable classes, leading to less
efficient but more minimizable ("factorizable") code.
- Machine module: Abstract machine representation for code generation.
Contains:
- Sparse transition table with states and transitions labelled by LR(1)
- A register transfer language for implementing captures (moves, captures,
clear operations)
- Dynamic priority chain: each accepting state stores a list of
(clause, priority, registers) tuples; at runtime the first matching
clause wins. This avoids statically duplicating states for each priority
ordering, which would cause combinatorial state explosion.
- Minimization using a refinement of Valmari's algorithm with custom
decomposition by accepted actions and register transfer operations.
The [stacks] type parameterizes the DFA construction with the actual stack
topology, allowing the same construction to work over plain LR(1) states
or refined LRC states.
*)openUtilsopenMiscopenFix.IndexingopenLrgrep_supportopenInfoopenSpecopenRegexp(** Stack topology abstraction for DFA construction.
Allows the same DFA construction to work over plain LR(1) states or
refined LRC states. *)type('g,'n)stacks={domain:'ncardinal;(** Total number of stack positions. *)tops:'nindexset;(** Set of stack top positions — viable positions where the stack can end. *)prev:'nindex->'nindexset;(** For a given stack position, returns the set of predecessor positions
that can transition to it in the LR automaton. *)label:'nindex->'glr1index;(** Returns the LR(1) state associated with a stack position. *)}typepriority=intletlabel_to_short_stringglabel=ifIndexSet.equallabel(Lr1.allg)then"<any>"elseletfilter=label|>IndexSet.to_seq|>Seq.map(Lr1.to_stringg)|>List.of_seqinString.concat"|"filterletstring_of_cap(i:Capture.t)="v"^string_of_indeximoduleNFA=struct(** Nondeterministic finite automaton from regular expressions.
Transitions are lazy — NFA states are only materialized when explored
during determinization. The [make] function returns a closure over the
grammar, redgraph, and branch, producing NFA states on-demand from
continuations ([K.t]). *)type('g,'r)t={uid:int;(** Unique identifier for graph visualization. *)k:'gK.t;(** The continuation (derived regex state) represented by this NFA node. *)transitions:('gLabel.t*('g,'r)tlazy_t)list;(** Outgoing transitions, each tagged with a label (filter, captures, usage).
Targets are lazy to avoid materializing unreachable states. *)branch:('g,'r)branchindex;(** The branch (error pattern) this NFA state belongs to. *)mutablemark:unitref;(** Visitor mark for graph traversal and deduplication. *)}letis_acceptingt=matcht.kwith|K.Accept->true|_->false(** Dump NFA as a GraphViz dot file. [only_forced] controls whether to
only include transitions whose lazy targets have been forced. *)letdumpg?(only_forced=true)toc=letpfmt=Printf.fprintfocfmtinp"digraph G {\n";p" node[shape=rect];\n";lettodo=ref[]inletmark=ref()inletvisitt=ift.mark!=markthen(t.mark<-mark;pushtodot)invisitt;letprintt=p" st%d[label=%S];\n"t.uid(ifis_acceptingtthen"Accept"else"");List.iter(fun((label:_Label.t),t')->ifnotonly_forced||Lazy.is_valt'then(letlazyt'=t'inp" st%d -> st%d [label=%S];\n"t.uidt'.uid(label_to_short_stringglabel.filter^"\n"^string_of_indexset~index:string_of_caplabel.captures);visitt'))t.transitions;infixpoint~propagate:printtodo;p"}\n"letcomparet1t2=Int.comparet1.uidt2.uidletdefault_mark=ref()letuid=letk=ref0infun()->incrk;!k(** Build NFA state constructor for a given branch.
Returns a closure that, given a continuation [k], produces the
corresponding NFA state. Transitions are computed via [K.derive],
then partitioned by label equivalence using [IndexRefine.annotated_partition]
to merge transitions sharing the same filter, captures, and usage.
States are memoized using hash-consing on the continuation. *)letmake(typeg)(g:ggrammar)rgbranch=letmoduleKMap=Map.Make(structtypet=gRegexp.K.tletcompare=Regexp.K.compareend)inletnfa=refKMap.emptyinletrecauxk=matchKMap.find_optk!nfawith|Somet->t|None->letinj({Label.filter;usage;captures},t)=(filter,(usage,captures,t))inletprjfilter(usage,captures,t)=({Label.filter;usage;captures},t)inlettransitions=K.derivegrg(Lr1.allg)k|>process_transitions|>List.mapinj|>IndexRefine.annotated_partition|>List.concat_map(fun(filter,l)->List.map(prjfilter)l)inletuid=uid()inlett={uid;k;transitions;branch;mark=default_mark}innfa:=KMap.addkt!nfa;tandprocess_transitions=function|[]->[]|(label,k')::rest->(label,lazy(auxk'))::process_transitionsrestinaux(** Build NFA states for all branches in [branches].
For each branch, creates the initial NFA state from the branch's
regular expression wrapped as [K.More (re, K.Done)]. *)letfrom_branchesinforgbranches=Vector.mapi(funbrre->makeinforgbr(Regexp.K.More(re,Regexp.K.Done)))branches.exprendmoduleDFA=struct(** Mapping from target kernel positions to (source position, captures, usage).
For each position in the target state's kernel, records which position
in the source state's kernel it came from, along with the set of captures
and usages associated with that transition. *)type('src,'tgt)mapping=('tgt,'srcindex*(Capture.set*Usage.set))vector(** DFA state.
Each state has a kernel of NFA states ordered by branch priority,
a vector of branch indices, and a boolean vector marking which
kernel positions are accepting. *)type('g,'r,'dfa,'n)state={index:'dfaindex;branches:('n,('g,'r)branchindex)vector;(** Branch index for each position in the kernel. *)accepting:'nBoolvector.t;(** Which kernel positions correspond to accepting NFA states. *)mutabletransitions:('g,'r,'dfa,'n)transitionlist;(** Outgoing transitions, populated during determinization. *)}(** DFA transition with a label (set of LR1 states), a target state,
and a mapping from target kernel positions back to source positions. *)and('g,'r,'dfa,'src)transition=Transition:{label:'glr1indexset;(** Set of LR(1) states that trigger this transition. *)target:('g,'r,'dfa,'tgt)state;(** The target DFA state. *)mapping:('src,'tgt)mapping;(** Maps each target kernel position to its source kernel position
and the associated captures/usage. *)}->('g,'r,'dfa,'src)transition(** Erased-phantom packed state, used for vector storage. *)type('g,'r,'dfa)packed=Packed:('g,'r,'dfa,'n)state->('g,'r,'dfa)packed[@@ocaml.unboxed](** Complete DFA with all states, transitions, and kernel information. *)type('g,'r,'dfa)t={initial:'dfaindex;(** Index of the initial state. *)states:('dfa,('g,'r,'dfa)packed)vector;(** All DFA states indexed by their DFA index. *)domain:('dfa,'glr1indexset)vector;(** For each state, the set of LR(1) states for which there exists a
reachable stack that can reach this state. *)kernels:('dfa,('g,'r)NFA.tarray)vector;(** For each state, the array of NFA states in its kernel (ordered by priority). *)}letppdoc=letbuf=Buffer.create7inPPrint.ToBuffer.pretty0.980buf(Cmon.printdoc);String.split_on_char'\n'(Buffer.contentsbuf)letdumpgt(rg:_Redgraph.graph)oc=letpfmt=Printf.fprintfocfmtinp"digraph G {\n";p" node[shape=rect];\n";Vector.iter(fun(Packedstate)->letexprs=ref[]inletaccept=ref[]inletstepindex0=letindex=refindex0inwhilematchRedgraph.followrg!indexwith|Advanceindex'->index:=index';true|Switch_->falsedo()done;if!index=index0thencmon_indexindex0elsePrintf.ksprintfCmon.constant"%d-%d"(Index.to_int!index)(Index.to_int!index-Index.to_intindex0)inArray.iterbeginfunnfa->exprs:=List.rev_append(pp(K.cmon~stepnfa.NFA.k))!exprs;endt.kernels.:(state.index);Vector.iteribeginfunibr->ifBoolvector.teststate.acceptingithenpushacceptbrendstate.branches;p" st%d[label=\"#%d:%s\"];\n"(Index.to_intstate.index)(Index.to_intstate.index)(String.concat"\\l"@@(List.rev!exprs)@[string_concat_map","string_of_index(List.rev!accept)]);List.iter(fun(Transitiontr)->p" st%d -> st%d [label=%S];\n"(Index.to_intstate.index)(Index.to_inttr.target.index)(label_to_short_stringgtr.label^"\n"^letcaps=refIndexSet.emptyinVector.iter(fun(_,(cap,_))->caps:=IndexSet.unioncap!caps)tr.mapping;string_of_indexset~index:string_of_cap!caps);)state.transitions;)t.states;p"}\n"(** Erased-phantom existential wrapper for the DFA. *)type('g,'r)_t=T:('g,'r,'dfa)t->('g,'r)_t(** Determinize NFA branches into a DFA using modified power set construction.
Takes the grammar, error pattern branches, stack topology, and the
initial stack position. Returns a DFA where states are hash-consed
by their kernel (ordered array of NFA states). Only transitions
corresponding to reachable LR stacks are constructed. *)letdeterminize(typegrs)(g:ggrammar)(branches:(g,r)branches)(stacks:(g,s)stacks)initial:(g,r)_t=letmoduleConstruction=structincludeIndexBuffer.Gen.Make()type'nprestate={index:nindex;kernel:('n,(g,r)NFA.t)vector;accept:(g,r)branchoptindexoption;mutableraw_transitions:(glr1indexset*'nfwd_mappinglazy_t)list;}and'srcfwd_mapping=Fwd_mapping:('src,'tgt)mapping*'tgtprestate->'srcfwd_mappingtypeprepacked=Prepacked:'nprestate->prepacked[@@ocaml.unboxed]letprestates=get_generator()letcompare_kernelg1g2=array_compareNFA.compareg1g2moduleKernelMap=Map.Make(structtypet=(g,r)NFA.tarrayletcompare=compare_kernelend)letkernel_make(typea)(prj:a->(g,r)NFA.t)(ts:alist):aarray=letmark=ref()inletts=List.filter(funa->letth=prjainifth.mark!=markthen(th.mark<-mark;true)elsefalse)tsinArray.of_listtsletkernel_foldfxacc=letacc=refaccinVector.iteri(funix->acc:=fix!acc)x;!accletdfa=refKernelMap.emptyletinitial=letrecdeterminize_kernel:typen.(n,(g,r)NFA.t)vector->nprestate=funkernel->matchKernelMap.find_opt(Vector.as_arraykernel)!dfawith|Some(Prepackedt')->letRefl=assert_equal_lengthkernelt'.kernelint'|None->letaccept=refNoneinletrev_transitions=letmakei({Label.filter;captures;usage},t)=(filter,(i,(captures,usage),t))inkernel_fold(funinfaacc->ifOption.is_none!accept&&NFA.is_acceptingnfa&&Boolvector.testbranches.is_totalnfa.branchthenaccept:=Somebranches.priority.:(nfa.branch);list_rev_mappend(makei)nfa.transitionsacc)kernel[]inletprepare_target_kernel(index,captures,lazynfa)=nfa,(index,captures)inletprocess_classlabelrev_targets=label,lazy(letPackedresult=rev_targets|>List.rev_mapprepare_target_kernel|>kernel_makefst|>Vector.of_arrayinFwd_mapping((Vector.mapsndresult),determinize_kernel(Vector.mapfstresult)))inletraw_transitions=ref[]inIndexRefine.iter_merged_decompositionrev_transitions(funlabeltargets->pushraw_transitions(process_classlabeltargets));letraw_transitions=!raw_transitionsinletreservation=IndexBuffer.Gen.reserveprestatesinletstate={index=IndexBuffer.Gen.indexreservation;kernel;accept=!accept;raw_transitions;}inIndexBuffer.Gen.commitprestatesreservation(Prepackedstate);dfa:=KernelMap.add(Vector.as_arraykernel)(Prepackedstate)!dfa;stateinletVector.Packedkernel=Vector.of_array(kernel_makeFun.id(Vector.to_listinitial))in(determinize_kernelkernel).indexlet()=stopwatch3"Processed initial states"letvisited:(n,sindexset)IndexBuffer.Dyn.t=IndexBuffer.Dyn.makeIndexSet.emptyletscheduled:(n,sindexset)IndexBuffer.Dyn.t=IndexBuffer.Dyn.makeIndexSet.emptylet(.*())=IndexBuffer.Dyn.getlet(.*()<-)=IndexBuffer.Dyn.setletmin_clauset=(Vector.as_arrayt.kernel).(0).branchlet()=letaccepting=Vector.make(branch_countbranches)[]inlettodo=ref[]inletscheduleboundiset=letPrepackedtaspacked=IndexBuffer.Gen.getprestatesiinifmin_clauset<=boundthenletset=IndexSet.diffsetvisited.*(i)inifIndexSet.is_not_emptysetthen(ifIndexSet.is_emptyscheduled.*(i)then(scheduled.*(i)<-set;matcht.acceptwith|Somecwhenc<Opt.somebound->beginmatchOpt.prjcwith|Somec'->accepting.@(c')<-List.conspacked|None->()end|Some_|None->pushtodopacked)elsescheduled.*(i)<-IndexSet.unionscheduled.*(i)set)inletupdatebound(Prepackedt)=lettodo=scheduled.*(t.index)iniffalsethenPrintf.eprintf"processing#%d: %s\n"(Index.to_intt.index)(Lr1.set_to_stringg(IndexSet.mapstacks.labeltodo));visited.*(t.index)<-IndexSet.unionvisited.*(t.index)todo;scheduled.*(t.index)<-IndexSet.empty;letby_label=IndexSet.fold(funstackmap->IndexMap.update(stacks.labelstack)(union_update(stacks.prevstack))map)todoIndexMap.emptyinList.iterbeginfun(label,target)->letreally_empty=reftrueinletexpand_stacklr1=matchIndexMap.find_optlr1by_labelwith|None->IndexSet.empty|Somestacks->really_empty:=false;stacksinletstacks=IndexSet.bindlabelexpand_stackinifnot!really_emptythenletlazy(Fwd_mapping(_,t'))=targetinifIndexSet.is_not_emptystacksthenscheduleboundt'.indexstacksendt.raw_transitionsinletnext_bound=Index.rev_enumerate(branch_countbranches)inletrecloopbound=match!todowith|[]->letbound=next_bound()intodo:=accepting.:(bound);accepting.:(bound)<-[];loopbound|todo'->todo:=[];List.iter(updatebound)todo';loopboundintryletbound=next_bound()inscheduleboundinitialstacks.tops;loopboundwithIndex.End_of_set->()letprestates=IndexBuffer.Gen.freezeprestatesletdomain=Vector.initn(funi->IndexSet.mapstacks.labelvisited.*(i))endinletstates=letmake(Construction.Prepacked{index;kernel;_})=letreachable=reftrueinletaccepting=Boolvector.from_vectorkernel(funnfa->!reachable&&ifNFA.is_acceptingnfathen(ifBoolvector.testbranches.is_totalnfa.branchthenreachable:=false;true)elsefalse)inletbranches=Vector.map(funt->t.NFA.branch)kernelinPacked{index;branches;accepting;transitions=[]}inVector.mapmakeConstruction.prestatesinletfrom_prestate(typen)(p:nConstruction.prestate):(g,r,_,n)state=letPackedt=states.:(p.index)inletRefl=assert_equal_lengtht.branchesp.kernelintinVector.iteri(funi(Construction.Prepackedp)->lett=from_prestatepinletdomain=Construction.domain.:(i)int.transitions<-List.filter_map(fun(label,target)->ifLazy.is_valtargetthenletlabel=IndexSet.interlabeldomaininifIndexSet.is_not_emptylabelthenletConstruction.Fwd_mapping(mapping,target)=Lazy.forcetargetinlettarget=from_prestatetargetinSome(Transition{label;mapping;target})elseNoneelseNone)p.raw_transitions;)Construction.prestates;stopwatch3"Determinized DFA (%d states)"(cardinalConstruction.n);letkernels=Vector.makeConstruction.n(Vector.as_arrayinitial)inConstruction.KernelMap.iterbeginfun_(Construction.Prepackedst)->kernels.:(st.index)<-Vector.as_arrayst.kernelend!Construction.dfa;T{initial=Construction.initial;states;domain=Construction.domain;kernels}letstate_countdfa=Vector.lengthdfa.statesendmoduleDataflow=struct(** Multi-pass dataflow analysis on the DFA.
Computes liveness, definedness, register allocation, and priority
chains via fixpoint iteration. The analysis proceeds in passes:
1. Reachability of branches from accepting states
2. Mark reachable transitions (usage tracking)
3. Dead-code analysis and unreachable clause warnings
4. Priority splits (which positions can distinguish clause precedence)
5. Priority chain construction via [Order_chain]
6. Accepted-before computation (for pruning priority changes)
7. Liveness analysis (which captures are needed at each state)
8. Definedness analysis (which captures have been produced)
9. Variable class computation (for register allocation)
10. Register allocation (naive greedy by variable class) *)typechain=(Order_chain.element*Order_chain.element)list(** A pairing of source and target order chain elements for a transition. *)type'nvar=('n,Capture.n)Prod.ntype'n_var_classes={domain:'ncardinal;mutableclasses:'nvarindexsetlist}typevar_classes=V:'n_var_classes->var_classes[@@ocaml.unboxed](** Results of the dataflow analysis. *)type('g,'r,'dfa)t={pairings:('dfa,(('g,'r)branchindex*chain)listlist)vector;(** For each state and each outgoing transition, the priority chain
pairings between source and target order chain elements. *)accepts:('dfa,(('g,'r)branchindex*priority)list)vector;(** For each state, the list of accepted branches with their priorities. *)liveness:('dfa,Capture.setarray)vector;(** For each state and each kernel position, the set of captures that
are live (needed) from this point onward. *)defined:('dfa,Capture.setarray)vector;(** For each state and each kernel position, the set of captures that
have been defined along some path to this state. *)classes:('dfa,var_classes)vector;(** For each state, the variable classes used for register allocation. *)registers:('dfa,Register.tCapture.maparray)vector;(** For each state and each kernel position, the mapping from captures
to allocated registers. *)register_count:int;(** Total number of registers allocated across all states. *)accepted_before:('dfa,('g,'r)branchindexset)vector;(** For each state, the set of branches that have been accepted on
some path to this state. Used for pruning priority remappings. *)}letliveness(typegrdfan)(t:(g,r,dfa)t)(st:(g,r,dfa,n)DFA.state)=Vector.cast_array(Vector.lengthst.branches)t.liveness.:(st.index)letdefined(typegrdfan)(t:(g,r,dfa)t)(st:(g,r,dfa,n)DFA.state)=Vector.cast_array(Vector.lengthst.branches)t.defined.:(st.index)letregisters(typegrdfan)(t:(g,r,dfa)t)(st:(g,r,dfa,n)DFA.state)=Vector.cast_array(Vector.lengthst.branches)t.registers.:(st.index)letclasses(typegrdfan)(t:(g,r,dfa)t)(st:(g,r,dfa,n)DFA.state):nvarindexsetlist=letVvc=t.classes.:(st.index)inletRefl=assert_equal_cardinalvc.domain(Vector.lengthst.branches)invc.classes(** Reverse mapping: from a target state back to a source state and the
associated kernel mapping. Used for backward dataflow analysis. *)type('g,'r,'dfa,'tgt)rev_mapping=Rev_mapping:('g,'r,'dfa,'src)DFA.state*('src,'tgt)DFA.mapping->('g,'r,'dfa,'tgt)rev_mapping(** Packed list of reverse mappings for a DFA state. *)type('g,'r,'dfa)packed_rev_mapping=Rev_packed:('g,'r,'dfa,'n)rev_mappinglist->('g,'r,'dfa)packed_rev_mapping[@@ocaml.unboxed]letdumpgdfatoc=letpfmt=Printf.fprintfocfmtinp"digraph G {\n";p" node[shape=rect];\n";Vector.iter(fun(DFA.Packedstate)->letacc=ref[]inletlive=refIndexSet.emptyinletdef=refIndexSet.emptyinletregs=refIndexMap.emptyinletliveness=livenesststateinletdefined=definedtstateinletregisters=registerststateinletclasses=classeststateinVector.iteri(funibr->live:=IndexSet.unionliveness.:(i)!live;def:=IndexSet.uniondefined.:(i)!def;IndexMap.iter(funcapreg->regs:=IndexMap.updatereg(cons_updatecap)!regs)registers.:(i);ifBoolvector.teststate.acceptingithenpushaccbr)state.branches;p" st%d[label=%S];\n"(Index.to_intstate.index)(string_concat_map","string_of_index(List.rev!acc)^"\n"^"live: "^string_of_indexset~index:string_of_cap!live^"\n"^"defined: "^string_of_indexset~index:string_of_cap!def^"\n"^"classes: "^string_concat_map", "(funvars->string_of_indexset~index:(funvar->string_of_cap(snd(Prod.prj(Vector.lengthstate.branches)var)))vars)classes^"\n"^"registers: "^string_concat_map"; "(fun(reg,caps)->Printf.sprintf"%d: %s"(Index.to_intreg)(string_concat_map","string_of_capcaps))(IndexMap.bindings!regs));List.iter(fun(DFA.Transitiontr)->p" st%d -> st%d [label=%S];\n"(Index.to_intstate.index)(Index.to_inttr.target.index)(label_to_short_stringgtr.label^"\n"^letcaps=refIndexSet.emptyinVector.iter(fun(_,(cap,_))->caps:=IndexSet.unioncap!caps)tr.mapping;string_of_indexset~index:string_of_cap!caps);)state.transitions;)dfa.DFA.states;p"}\n"(** Reverse the DFA transition graph for backward analysis. *)letreverse_transitionsdfa=lettable=Vector.make(DFA.state_countdfa)(Rev_packed[])inVector.iterbeginfun(DFA.Packedsrc)->letprocess(DFA.Transition{target;mapping;_})=matchtable.:(target.index)with|Rev_packed[]->table.:(target.index)<-Rev_packed[Rev_mapping(src,mapping)]|Rev_packed(Rev_mapping(_,mapping0)::_asxs)->letRefl=assert_equal_lengthmappingmapping0intable.:(target.index)<-Rev_packed(Rev_mapping(src,mapping)::xs)inList.iterprocesssrc.transitionsenddfa.states;table(** Run the full dataflow analysis pipeline on a DFA.
Executes 10 passes: reachability, usage marking, dead-code analysis,
priority splits, priority chain construction, accepted-before, liveness,
definedness, variable classes, and register allocation. Returns the
complete analysis results. *)letmake(typegrdfa)branches(dfa:(g,r,dfa)DFA.t)=letreverse_transitions=reverse_transitionsdfainletiter_reverse_transitions(typen)(t:(g,r,dfa,n)DFA.state)(f:(g,r,dfa,n)rev_mapping->unit)=matchreverse_transitions.:(t.index)with|Rev_packed[]->()|Rev_packed(Rev_mapping(_,mapping0)::_asxs)->letRefl=assert_equal_lengthmapping0t.branchesinList.iterfxsinletopenstructtype'ndata={state:(g,r,dfa,'n)DFA.state;mutablereachable:'nindexset;mutablesplits:'nindexset;mutablenew_splits:'nindexset;mutablechain:('nindex*Order_chain.element)list;mutablequeued:bool;}typepacked=Packed:'ndata->packed[@@ocaml.unboxed]letdata=dfa.states|>Vector.map@@fun(DFA.Packedt)->letn=Vector.lengtht.branchesinletreachable=IndexSet.init_from_setn(Boolvector.testt.accepting)inletsplits=IndexSet.emptyinletnew_splits=IndexSet.emptyinPacked{state=t;reachable;splits;new_splits;chain=[];queued=false}letget_data(typen)(st:(g,r,dfa,n)DFA.state):ndata=letPackedsplit=data.:(st.index)inletRefl=assert_equal_lengthst.branchessplit.state.branchesinsplitendin(* First pass: compute reachable branches *)beginlettodo=ref[]inletpropagate(Packedt)=letreach=t.reachableiniter_reverse_transitionst.state@@fun(Rev_mapping(src,mapping))->lets=get_datasrcinletchanged=reffalseinIndexSet.iter(funi->letj,_=mapping.:(i)inletreach'=s.reachableinletreach''=IndexSet.addjreach'inifnot(IndexSet.equalreach'reach'')then(s.reachable<-reach'';changed:=true;))reach;if!changedthenpushtodo(Packeds)inVector.iterpropagatedata;fixpoint~propagatetodo;stopwatch3"Computed reachability";end;(* Pass 2: Mark reachable transitions *)Vector.iterbeginfun(Packedt)->letreach=t.reachableiniter_reverse_transitionst.state@@fun(Rev_mapping(_,mapping))->IndexSet.iter(funi->let_,(_,usage)=mapping.:(i)inUsage.mark_usedusage)reachenddata;(* Pass 3: Report unmarked entities *)beginletreachable_branches=letPackedt=data.:(dfa.initial)inIndexSet.map(Vector.gett.state.branches)t.reachableinletiter_ref(re:Syntax.regular_expr)=matchre.descwith|Atom_->()|Filter_->()|Repetition{expr;policy=_}->fexpr|Reduce{capture=_;mark=_;expr;policy=_}->fexpr|Alternativeres->List.iterfres|Concatres->List.iterfresinletreccheck(re:Syntax.regular_expr)=matchre.descwith|Atom(_,_,mark)|Reduce{mark;_}->ifUsage.is_unusedmarkthenSyntax.warnre.position"expression is unreachable"|_->iter_recheckreinletoverriding=Vector.make(Vector.lengthbranches.clause)IndexSet.emptyinVector.iteribeginfuni(DFA.Packedst)->letaccepting=IndexSet.init_from_set(Vector.lengthst.branches)(Boolvector.testst.accepting)|>IndexSet.map(Vector.getst.branches)inletkernel=Vector.cast_array(Vector.lengthst.branches)dfa.kernels.:(i)inVector.iteribeginfuninfa->ifNFA.is_acceptingnfa&¬(Boolvector.testst.acceptingi)thenoverriding.@(st.branches.:(i))<-IndexSet.unionacceptingendkernel;enddfa.states;Vector.iteribeginfunbranch(pattern:Syntax.pattern)->ifIndexSet.membranchreachable_branchesthencheckpattern.exprelsebeginSyntax.warnpattern.expr.position"clause is unreachable";IndexSet.iterbeginfunbranch'->Syntax.warnbranches.pattern.:(branch').expr.position"this clause is shadowing it";endoverriding.:(branch)endendbranches.patternend;stopwatch3"Dead-code analysis";(* Pass 4: Compute priority splits *)beginletcount=ref0inlettodo=ref[]inVector.iterbeginfun(Packedt)->t.new_splits<-IndexSet.init_from_set(Vector.lengtht.state.branches)(Boolvector.testt.state.accepting);ifIndexSet.is_not_emptyt.new_splitsthenpushtodo(Packedt);enddata;letschedule(typen)(t:ndata)(splits:nindexset)=letsplits=IndexSet.diffsplitst.splitsinifIndexSet.is_emptysplitsthen()elseifIndexSet.is_emptyt.new_splitsthen(incrcount;pushtodo(Packedt);t.new_splits<-splits;)elset.new_splits<-IndexSet.uniont.new_splitssplitsinletrecschedule_one:typen.ndata->nindexset->unit=fun(typen)(t:ndata)(splits:nindexset)->letsplits=IndexSet.diffsplitst.splitsinifIndexSet.is_emptysplitsthen()elseifIndexSet.is_emptyt.new_splitsthen(t.new_splits<-splits;propagate(Packedt))elset.new_splits<-IndexSet.uniont.new_splitssplitsandpropagate(Packedsrc)=letnew_splits=src.new_splitsinsrc.new_splits<-IndexSet.empty;src.splits<-IndexSet.unionsrc.splitsnew_splits;letnew_splits=IndexSet.elementsnew_splitsinletrecmap_onemappingtgtixxs=letn=Array.lengthmappinginifi>=nthenIndexSet.emptyelseletx',_=mapping.(i)inifx'<xthenmap_onemappingtgt(i+1)xxselseletbranch=src.state.branches.:(x)inletacc=map_splitsmappingtgt(i+1)xsinifIndex.equalsrc.state.branches.:(x')branchthenIndexSet.add(Index.of_int(Vector.lengthtgt.DFA.branches)i)accelseaccandmap_splitsmappingtgti=function|[]->IndexSet.empty|x::xs->map_onemappingtgtixxsinmatchsrc.state.transitionswith|[]->()|[DFA.Transition{mapping;target;_}]->schedule_one(get_datatarget)(map_splits(Vector.as_arraymapping)target0new_splits)|xs->List.iterbeginfun(DFA.Transition{mapping;target;_})->schedule(get_datatarget)(map_splits(Vector.as_arraymapping)target0new_splits)endxsinfixpoint~propagatetodo;stopwatch3"computed priority splits (%d refinements)"!countend;(* Pass 5: Construct priority chain and remapping *)letchain=Order_chain.make()inletpairings=Vector.make(DFA.state_countdfa)[]inbeginletgroup_by_brancht=function|[]->[]|(i,_)asx::xs->letrecloopbranchaccaccs=function|[]->List.rev((branch,List.revacc)::accs)|(i,_)asx::xs->letbranch'=t.DFA.branches.:(i)inifbranch=branch'thenloopbranch(x::acc)accsxselseloopbranch'[x]((branch,List.revacc)::accs)xsinloopt.branches.:(i)[x][]xsinletrecchain_next_splitielement=function|(i',element')::rest->letc=Index.comparei'iinifc<0thenchain_next_splitielement'restelseifc=0then(element',rest)else(Order_chain.extendelement,rest)|[]->(Order_chain.nextelement,[])inletchain_processed=Boolvector.make(DFA.state_countdfa)falseinletroot=Order_chain.rootchaininletPackedinitial=data.:(dfa.initial)ininitial.chain<-(matchIndexSet.elementsinitial.splitswith|[]->[]|splits->letrecfresh_chainbranchelement=function|[]->[]|m::ms->letbranch'=initial.state.branches.:(m)inletelement=ifIndex.equalbranchbranch'thenOrder_chain.nextelementelserootin(m,element)::fresh_chainbranch'elementmsinfresh_chain(Index.of_int(branch_countbranches)0)rootsplits);Boolvector.setchain_processeddfa.initial;letdirect_transitions=ref0inletshared_transitions=ref0inlettrivial_pairing=ref0inletnontrivial_pairing=ref0inlettransitions_with_pairing=ref0inletprocess_direct_transitionsrcmappingtgt=assert(not(Boolvector.testchain_processedtgt.state.DFA.index));incrdirect_transitions;letsbranches=src.state.branchesinlettbranches=tgt.state.branchesinletrecextract_branchbranchacc=function|(n,_)asx::xswhenIndex.equalsbranches.:(n)branch->extract_branchbranch(x::acc)xs|rest->List.revacc,restinletrecseek_branchbranch=function|[]->[],[]|((n,_)asx::xs)asxxs->letc=Index.comparesbranches.:(n)branchinifc<0thenseek_branchbranchxselseifc=0thenextract_branchbranch[x]xselse([],xxs)inletrecprocess_splitschain=function|[]->[]|m::ms->letbranch=tbranches.:(m)inletchain,rest=seek_branchbranchchaininprocess_branchbranchchainrestmmsandprocess_branchbranchchainrestmms=leti,_=mapping.:(m)inletsplit,chain=chain_next_splitirootchainin(m,split)::process_continue_branchbranchchainrestmsandprocess_continue_branchbranchchainrest=function|m::mswhenIndex.equaltbranches.:(m)branch->process_branchbranchchainrestmms|ms->process_splitsrestmsintgt.chain<-process_splitssrc.chain(IndexSet.elementstgt.splits);Boolvector.setchain_processedtgt.state.indexinletprocess_shared_transitionsrcmappingtgt=incrshared_transitions;assert(Boolvector.testchain_processedsrc.state.index);assert(Boolvector.testchain_processedtgt.state.index);letsrc_chain=group_by_branchsrc.statesrc.chaininlettgt_chain=group_by_branchtgt.statetgt.chaininletrecfind_elementielement=function|[]->element,[]|(i',element')::xsasxxs->if(i':_index)>ithenelement,xxselsefind_elementielement'xsinletrecpair_elementssrc_elements=function|[]->[]|(i,tgt_element)::rest->letsrc_element,src_elements=find_element(fstmapping.:(i))rootsrc_elementsinlettl=pair_elementssrc_elementsrestinifsrc_element==tgt_elementthen(incrtrivial_pairing;tl)else(incrnontrivial_pairing;(src_element,tgt_element)::tl)inletrecprocess_tgtclauseelementsnext=function|(clause',_)::restwhencompare_indexclause'clause<0->process_tgtclauseelementsnextrest|(clause',elements')::restwhenequal_indexclauseclause'->lettl=process_nextrestnextinbeginmatchpair_elementselements'elementswith|[]->tl|hd->(clause,hd)::tlend|src_chain->process_nextsrc_chainnextandprocess_nextsrc_chain=function|[]->[]|(clause,elements)::next->process_tgtclauseelementsnextsrc_chaininprocess_nextsrc_chaintgt_chaininletvisitacc(_,Packedsrc)=assert(Boolvector.testchain_processedsrc.state.index);letacc=refaccinletprocess_transition(DFA.Transition{label;target;mapping;_})=lettgt=get_datatargetinletpairing=ifBoolvector.testchain_processedtarget.indexthenprocess_shared_transitionsrcmappingtgtelse(process_direct_transitionsrcmappingtgt;pushacc(label,Packedtgt);[])inifnot(list_is_emptypairing)thenincrtransitions_with_pairing;pairinginletpairings'=List.mapprocess_transitionsrc.state.transitionsinpairings.:(src.state.index)<-pairings';!accinletrecloop=function|[]->()|xs->loop(List.fold_leftvisit[](List.sort(fun(l1,_)(l2,_)->IndexSet.comparel1l2)xs))inloop(visit[]((),Packedinitial));stopwatch3"constructed order chain with %d elements \
(%d direct transitions, %d shared, %d trivial pairings, \
%d non-trivial pairings, %d transitions with pairings)"(Order_chain.freezechain)!direct_transitions!shared_transitions!trivial_pairing!nontrivial_pairing!transitions_with_pairing;end;(* Pass 6: Collect accepted branches and their priority level *)letaccepts=data|>Vector.map@@fun(Packedt)->letremainder=reft.chaininletaccepting=t.state.acceptinginletbranches=t.state.branchesinletrecloopielement=function|(i',element')::restwhenIndex.comparei'i<=0&&Index.equalbranches.:(i')branches.:(i)->loopielement'rest|rest->remainder:=rest;elementinletget_elementi=loopi(Order_chain.rootchain)!remainderinletacc=ref[]inlettest_branchiindex=ifBoolvector.testacceptingithenpushacc(index,Order_chain.evaluate(get_elementi))inVector.iteritest_branchbranches;List.rev!accin(* Worklist on states for computing fixed points *)lettodo=ref[]inletschedulest=ifnotst.queuedthen(st.queued<-true;pushtodo(Packedst);)inletget(typen)v(st:(_,_,_,n)DFA.state):(n,Capture.set)vector=Vector.cast_array(Vector.lengthst.branches)v.:(st.index)in(* Pass 6b: Accepted before, for pruning priority changes *)letaccepted_before=Vector.map(funxs->IndexSet.of_list(List.mapfstxs))acceptsinlet()=letpropagate(Packedsrc)=assertsrc.queued;src.queued<-false;letmax_clauset=letarr=Vector.as_arrayt.DFA.branchesinarr.(Array.lengtharr-1)inletdef_src=accepted_before.:(src.state.index)inletdef_min=Option.get(IndexSet.minimumdef_src)inList.iterbeginfun(DFA.Transition{target;_})->letmax_clause=max_clausetargetinletdef_tgt=accepted_before.:(target.index)inletdef_tgt'=IndexSet.fused_inter_uniondef_src(IndexSet.init_intervaldef_minmax_clause)~acc:def_tgtinifdef_tgt'!=def_tgtthen(accepted_before.:(target.index)<-def_tgt';schedule(get_datatarget))endsrc.state.transitionsinfixpoint~propagatetodo;stopwatch3"Computed accepted-before";inletliveness,defined=(* Pass 7: Compute liveness of variables (Section 4.4.1, Definition 16) *)letliveness=dfa.states|>Vector.map@@fun(DFA.Packedst)->letimmediate=st.branches|>Vector.mapi@@funibr->ifBoolvector.testst.acceptingithen(schedule(get_datast);branches.br_captures.:(br))elseIndexSet.emptyinVector.as_arrayimmediateinletpropagate(Packedtgt)=asserttgt.queued;tgt.queued<-false;letlive_tgt=getlivenesstgt.stateiniter_reverse_transitionstgt.statebeginfun(Rev_mapping(src,mapping))->letchanged=reffalseinletlive_src=getlivenesssrcinletsrc=get_datasrcinletprocess_mappingtgt_j(src_i,(captures,_usage))=letsuccessors=IndexSet.difflive_tgt.:(tgt_j)capturesinletlive=live_src.:(src_i)inletlive'=IndexSet.unionsuccessorsliveiniflive'!=livethen(live_src.:(src_i)<-live';changed:=true;)inVector.iteriprocess_mappingmapping;if!changedthenschedulesrcend;infixpoint~propagatetodo;stopwatch3"Computed liveness";(* Pass 8: Compute defined variables (Section 4.4.1, Definition 17) *)letdefined=dfa.states|>Vector.map@@fun(DFA.Packedtgt)->letlive=getlivenesstgtinletresult=Vector.make(Vector.lengthlive)IndexSet.emptyiniter_reverse_transitionstgtbeginfun(Rev_mapping(_src,mapping))->letprocess_mappingtgt_j(_,(captures,_usage))=letcaptures=IndexSet.interlive.:(tgt_j)capturesinresult.@(tgt_j)<-IndexSet.unioncapturesinVector.iteriprocess_mappingmapping;end;ifVector.existsIndexSet.is_not_emptyresultthenschedule(get_datatgt);Vector.as_arrayresultinletpropagate(Packedsrc)=assertsrc.queued;src.queued<-false;letdef_src=getdefinedsrc.stateinList.iterbeginfun(DFA.Transition{target;mapping;_})->letchanged=reffalseinletlive_tgt=getlivenesstargetinletdef_tgt=getdefinedtargetinletprocess_mappingtgt_j(src_i,(_captures,_usage))=letdef=def_tgt.:(tgt_j)inletdef'=IndexSet.union(IndexSet.interdef_src.:(src_i)live_tgt.:(tgt_j))definifdef!=def'then(changed:=true;def_tgt.:(tgt_j)<-def')inVector.iteriprocess_mappingmapping;if!changedthenschedule(get_datatarget)endsrc.state.transitionsinfixpoint~propagatetodo;stopwatch3"Computed defined";(liveness,defined)in(* Pass 9: Classes *)letclasses=letlift_classdomainicaps=IndexSet.map(Prod.injdomaini)capsinletclasses=Vector.mapi(funidef->letVector.Packedv=Vector.of_arraydefinletdomain=Vector.lengthvinletvc=Vector.fold_righti(funicaps->IndexSet.union(lift_classdomainicaps))vIndexSet.emptyinletPackedst=data.:(i)inletclasses=ifIndexSet.is_emptyvcthen[]else(schedulest;[vc])inV{domain=Vector.lengthv;classes})definedinletget_classes(typen)(st:(_,_,_,n)DFA.state):nvarindexsetlist=letV{domain;classes}=classes.:(st.index)inletRefl=assert_equal_cardinaldomain(Vector.lengthst.branches)inclassesinletset_classes(typen)(st:(_,_,_,n)DFA.state)(vc:nvarindexsetlist)=letVv=classes.:(st.index)inifList.compare_lengthsv.classesvc<>0thenletRefl=assert_equal_cardinalv.domain(Vector.lengthst.branches)inschedule(get_datast);v.classes<-vcinletpropagate(Packedsrc)=assertsrc.queued;src.queued<-false;letsdomain=Vector.lengthsrc.state.branchesinletvc'=get_classessrc.stateinList.iterbeginfun(DFA.Transition{target;mapping;_})->letvc=get_classestargetinlettdomain=Vector.lengthtarget.branchesinletdefined=getdefinedtargetinletrmap=Vector.makesdomainNoneinletcaps=refIndexSet.emptyinVector.rev_iteri(funtgt_j(src_i,(caps',_))->rmap.:(src_i)<-Sometgt_j;letcaps'=IndexSet.interdefined.:(tgt_j)caps'incaps:=IndexSet.union(lift_classtdomaintgt_jcaps')!caps;)mapping;letcaps=!capsinletvc'=List.map(funset->IndexSet.filter_map(funv->leti,j=Prod.prjsdomainvinmatchrmap.:(i)with|Somei'whenIndexSet.memjdefined.:(i')->letv'=Prod.injtdomaini'jinifIndexSet.memv'capsthenNoneelseSomev'|_->None)set)vc'inset_classestarget(IndexRefine.partition(caps::vc@vc'));endsrc.state.transitionsinfixpoint~propagatetodo;stopwatch3"Computed classes";classesin(* Pass 10: (Naive) register allocation *)letregisters:(dfa,Register.tCapture.maparray)vector=defined|>Vector.mapi@@funidef->letVector.Packedlive=Vector.of_arraydefinletdomain=Vector.lengthliveinletVvc=classes.:(i)inletRefl=assert_equal_cardinalvc.domaindomaininletresult=Vector.makedomainIndexMap.emptyinList.iteri(funregvars->letreg=Register.of_intreginIndexSet.iter(funvar->leti,cap=Prod.prjdomainvarinresult.@(i)<-IndexMap.addcapreg)vars;)vc.classes;Vector.as_arrayresultinletregister_count=letmax_live=ref0inletmax_index=ref(-1)inletcheck_state(DFA.Packedstate)=letregs=registers.:(state.index)inletmax_live'=Array.fold_left(funsummap->sum+IndexMap.cardinalmap)0regsinmax_live:=max!max_livemax_live';Array.iter(IndexMap.iter(fun_reg->max_index:=max!max_index(Index.to_intreg)))regs;inVector.itercheck_statedfa.states;stopwatch3"allocated registers (max live variables: %d, register count: %d)"!max_live(!max_index+1);!max_index+1in(* Collect results *){pairings;accepts;register_count;liveness;defined;classes;registers;accepted_before}endmoduleMachine=struct(** Bytecode representation of the automaton for code generation.
The machine is a sparse transition table with a register transfer
language. Transitions carry labels with filters, captures, register
moves, clears, and dynamic priority remappings. *)type('g,'r)label={filter:'glr1indexset;(** The set of LR(1) states that allow this transition to be taken. *)captures:(Capture.t*Register.t)list;(** Variables to capture and the register in which to store them
when the transition is taken. *)clear:Register.set;(** Registers to clear when the transition is taken (for captures
that go out of scope or are undefined). *)moves:Register.tRegister.map;(** Register-to-register transfers when taking this transition.
Keys are source registers, values are target registers. *)priority:(('g,'r)branchindex*priority*priority)list;(** Dynamic priority remappings for clause precedence.
An element (c, p1, p2) means that a match of clause [c] at
priority [p1] in the source state corresponds to a match at
priority [p2] in the target state. *)}letlabel_comparet1t2=letc=IndexSet.comparet1.filtert2.filterinifc<>0thencelseletc=List.compare(compare_paircompare_indexcompare_index)t1.capturest2.capturesinifc<>0thencelseletc=IndexMap.comparecompare_indext1.movest2.movesinifc<>0thencelseletc=IndexSet.comparet1.cleart2.clearinifc<>0thencelse(* Compare t1.priority vs t2.priority. Two transitions with identical
filter/captures/moves/clear but different priority remappings would
be considered equal, potentially causing incorrect DFA minimization. *)letcompare_priority(b1,s1,t1)(b2,s2,t2)=letc=Index.compareb1b2inifc<>0thencelseletc=Int.compares1s2inifc<>0thencelseInt.comparet1t2inList.comparecompare_priorityt1.priorityt2.priority(** The machine representation for code generation.
A sparse transition table with register transfer operations.
Parameterized by:
- ['g] is the grammar (input)
- ['r] is the set of rules (input)
- ['st] is the set of states (output)
- ['tr] is the set of transitions (output) *)type('g,'r,'st,'tr)t={initial:'stindexoption;(** Index of the initial state, or [None] if there are no viable patterns. *)source:('tr,'stindex)vector;(** For each transition, the source state index. *)target:('tr,'stindex)vector;(** For each transition, the target state index. *)label:('tr,('g,'r)label)vector;(** For each transition, its label (filter, captures, moves, clear, priority). *)unhandled:('st,'glr1indexset)vector;(** For each state, the set of LR(1) states for which stacks can reach
this state but no transition is defined. These should be rejected
at runtime. *)outgoing:('st,'trindexset)vector;(** For each state, the set of outgoing transition indices. *)accepting:('st,(('g,'r)branchindex*priority*Register.tCapture.map)list)vector;(** For each state, the list of clauses accepted when reaching that state.
Each clause comes with a priority level and a register mapping indicating
where captured variables can be found. The first matching clause wins. *)branches:('st,(('g,'r)branchindex*bool*Register.tCapture.map)list)vector;(** For each state, the list of clauses being recognized in that state.
Each entry is (branch index, is_accepting, register mapping). *)register_count:int;(** Total number of registers used across all states. *)partial_captures:Capture.set;(** Set of captures that may be only partially defined (some paths define
them, others don't). *)}type('g,'r)_t=T:('g,'r,'st,'tr)t->('g,'r)_tletdumpgtoc=letpfmt=Printf.fprintfocfmtinp"digraph G {\n";p" node[shape=rect];\n";Vector.iteri(funstaccept->letaccept=List.map(fun(br,_,captures)->string_of_indexbr^"["^string_concat_map","(fun(cap,reg)->string_of_capcap^" = !"^string_of_indexreg)(IndexMap.bindingscaptures)^"]")acceptinp" st%d[label=%S];\n"(Index.to_intst)(String.concat","accept);)t.accepting;Vector.iteri(funtrlabel->p" st%d -> st%d [label=%S];\n"(Index.to_intt.source.:(tr))(Index.to_intt.target.:(tr))(label_to_short_stringglabel.filter^"\n"^String.concat"\n"(List.map(fun(src,dst)->string_of_indexdst^" <- "^string_of_indexsrc)(IndexMap.bindingslabel.moves)@[string_concat_map", "(fun(cap,reg)->string_of_capcap^" = !"^string_of_indexreg)label.captures]));)t.label;p"}\n"(** Minimize the DFA and produce the final machine representation.
Converts the DFA with dataflow analysis results into a compact machine
with sparse transition tables and register transfer language. Uses a
refinement of Valmari's algorithm with custom decomposition:
- States are refined by accepted actions (clauses and priorities)
- Transitions are grouped by LR(1) filter and by register operations
Returns [None] for the initial state if no patterns are viable. *)letminimize(typegrdfa)(branches:(g,r)branches)(dfa:(g,r,dfa)DFA.t)(dataflow:(g,r,dfa)Dataflow.t)=letpartial_captures=refIndexSet.emptyinletmoduleTransition=structtypet={source:dfaindex;target:dfaindex;label:(g,r)label;}openIndexBufferincludeGen.Make()letvector=letgen=get_generator()inletprocess_transitionsourcesrc_regs(DFA.Transition{label=filter;mapping;target;_})pairings=lettgt_regs=Dataflow.registersdataflowtargetinletcaptures=ref[]inletmoves=refIndexMap.emptyinletclear=refIndexSet.emptyinletprocess_mapping(src_i,(captured,_usage))tgt_bank=letsrc_bank=src_regs.:(src_i)inletprocess_tgt_regcapturetgt_reg=ifIndexSet.memcapturecapturedthenpushcaptures(capture,tgt_reg)elsematchIndexMap.find_optcapturesrc_bankwith|Somesrc_reg->ifsrc_reg<>tgt_regthenmoves:=IndexMap.addsrc_regtgt_reg!moves|None->partial_captures:=IndexSet.addcapture!partial_captures;clear:=IndexSet.addtgt_reg!clearinIndexMap.iterprocess_tgt_regtgt_bankinVector.iter2process_mappingmappingtgt_regs;letcaptures=!capturesandmoves=!movesandclear=!clearinletaccepted_before=dataflow.accepted_before.:(source)inletpriority=List.concat_map(fun(branch,pairs)->ifIndexSet.membranchaccepted_beforethenList.map(fun(p1,p2)->branch,Order_chain.evaluatep1,Order_chain.evaluatep2)pairselse[])pairingsinletlabel={filter;captures;moves;clear;priority}inignore(Gen.addgen{source;target=target.index;label})inletprocess_state(DFA.Packedsource)pairings=List.iter2(process_transitionsource.index(Dataflow.registersdataflowsource))source.transitionspairingsinVector.iter2process_statedfa.statesdataflow.pairings;Gen.freezegenendinletpartial_captures=letacc=!partial_capturesinVector.fold_leftbeginfunacc(DFA.Packedst)->Vector.fold_lefti2beginfunacciindexregs->ifBoolvector.testst.acceptingithenletcap=branches.br_captures.:(index)inIndexSet.foldbeginfunvaracc->ifIndexMap.memvarregsthenaccelseIndexSet.addvaraccendcapaccelseaccendaccst.branches(Dataflow.registersdataflowst)endaccdfa.statesinletmoduleMin=Valmari.Minimize_with_custom_decomposition(structtypestates=dfaletstates=DFA.state_countdfatypetransitions=Transition.nlettransitions=Transition.ntype[@ocaml.warning"-34"]nonreclabel=(g,r)labelletlabeli=Transition.vector.:(i).labelletsourcei=Transition.vector.:(i).sourcelettargeti=Transition.vector.:(i).targetletinitialsf=fdfa.initialletfinalsf=Vector.iteri(funindexaccepts->matchacceptswith|[]->()|_::_->findex)dataflow.acceptslet[@ocaml.warning"-32"]refinementsrefine=(* Refine states by accepted actions *)lettable=Hashtbl.create7inVector.rev_iteri(funindexaccepts->matchacceptswith|[]->()|_::_->matchHashtbl.find_opttableacceptswith|None->Hashtbl.addtableaccepts(ref(IndexSet.singletonindex))|Somer->r:=IndexSet.addindex!r)dataflow.accepts;Hashtbl.iter(fun_r->refine(fun~add->IndexSet.iteradd!r))tablelet[@ocaml.warning"-32"]decompositionrefine=letacc=ref[]inletactions=ref[]inIndex.itertransitions(funtr->letlabel=labeltrinpushacc(label.filter,tr);iflabel.captures<>[]||IndexSet.is_not_emptylabel.clear||not(IndexMap.is_emptylabel.moves)thenpushactions({labelwithfilter=IndexSet.empty},tr););IndexRefine.iter_decomposition!acc(fun_setiter->refine(fun~add->iteradd));letactions=List.sort(fun(l1,_)(l2,_)->label_comparel1l2)!actionsinletrecgroup_actionslks=function|(l',k)::restwhenlabel_comparell'=0->group_actionsl(k::ks)rest|rest->refine(fun~add->List.iteraddks);startrestandstart=function|[]->()|(l,k)::rest->group_actionsl[k]restinstartactionsend)inletinitial=ifArray.lengthMin.initials=0thenNoneelseSomeMin.initials.(0)inletsource=Vector.initMin.transitionsMin.sourceinlettarget=Vector.initMin.transitionsMin.targetinletlabel=Vector.initMin.transitionsMin.labelinletaccepting=Vector.initMin.states@@funstate->letDFA.Packedsource=dfa.states.:(Min.represent_statestate)inletpriorities=refdataflow.accepts.:(source.index)inletget_priorityclause=match!prioritieswith|(clause',p)::rest->ifnot(Index.equalclauseclause')then(Printf.eprintf"Accepting clause %d but got priority for clause %d?!\n"(Index.to_intclause)(Index.to_intclause');assertfalse)elseiffalsethenPrintf.eprintf"Accepting clause %d with priority %d\n"(Index.to_intclause)p;priorities:=rest;p|[]->assertfalseinletadd_acceptingacciindexregs=ifBoolvector.testsource.acceptingithen(index,get_priorityindex,regs)::accelseaccinletregisters=Dataflow.registersdataflowsourceinList.rev(Vector.fold_lefti2add_accepting[]source.branchesregisters)inletbranches=Vector.initMin.states@@funstate->letDFA.Packedsource=dfa.states.:(Min.represent_statestate)inletadd_branchibranchregsacc=(branch,Boolvector.testsource.acceptingi,regs)::accinletregisters=Dataflow.registersdataflowsourceinVector.fold_righti2add_branchsource.branchesregisters[]inletoutgoing=Vector.makeMin.statesIndexSet.emptyinletunhandled=Vector.makeMin.statesIndexSet.emptyin(* Initialize unhandled with all reachable labels *)Index.iter(DFA.state_countdfa)beginfunst->matchMin.transport_statestwith|None->()|Someindex->unhandled.@(index)<-IndexSet.uniondfa.domain.:(st)end;(* Remove the ones for which transitions exist.
Populate outgoing. *)Index.rev_iterMin.transitionsbeginfuntr->letindex=Min.sourcetrinletlabel=Min.labeltrinletvisited=Vector.getunhandledindexinletvisited=IndexSet.diffvisitedlabel.filterinVector.setunhandledindexvisited;outgoing.@(index)<-IndexSet.addtrend;stopwatch3"OutDFA";T{initial;source;target;label;unhandled;outgoing;partial_captures;register_count=dataflow.register_count;accepting;branches}letstatest=Vector.lengtht.outgoingend