123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583(* MIT License
*
* Copyright (c) 2025 Frédéric Bour
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
*
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*)(** Reduction graph construction and analysis
This module builds and manipulates a graph of viable reductions in an LR(1)
parser. The reduction graph represents the structure of possible reductions
as paths through the automaton, enabling efficient lookahead analysis and
priority computation.
Proceeds in three steps:
- Compute the closure ϵ-reductions (reductions that do not consume any
input token) for each LR(1) state.
This analysis is local (it does not depend on the stack, only on the
LR(1) state), and forms a tree of possible sequences of ϵ-reductions,
ending with optional "pending", non-ϵ, reductions that need to consume
states from the stack to proceed.
This closure is represented by stack_tree's and reduction_closure's, and
simplifies and speeds up later analyses.
- Build a target trie that maps reduction targets (e.g., a nonterminal to
reduce) to the goto transitions where they can occur, enabling reverse
lookup from user-specified patterns to graph nodes.
- Construct a graph whose edges are labelled by LR(1) states and which map
an LR(1) stack suffix to the (sequences of) reductions applicable to
this configuration.
The paths of this graph enumerate all the stack suffixes that can be
consumed by reducing. The process is repeated as long as a reduction is
applicable, thus a right recursion [A → α A] translates to a cycle.
The process also keeps track of lookahead symbols permitting each
reduction to strictly simulate the behavior of an LR(1) automaton that
possibly went through conflict resolution.
But to recognize a reduction pattern, we have to do the reverse mapping:
the user provides the target of a reduction (e.g. I want to reduce an
expression), and we need to find the paths that can reach this target.
So we introduce a "target" abstraction to which a reduction pattern
translates to, a reverse index [target_trie] to go from a pattern to a
set of targets, and we associate to each node of the graph the reachable
targets.
Architecture:
- The graph nodes ("cells") represent configurations of (LR state, reduction
position, lookahead set). These are the vertices of the reduction graph.
- Edges represent transitions: moving from one reduction position to another
via goto transitions.
- The graph is minimization-aware: Valmari's algorithm is used to minimize
the graph while preserving the reachability structure needed for
computation of minimal costs.
Key data structures:
- 'g stack_tree: Represents the tree of possible reduction stacks for a
given LR state. Each node contains:
- [next]: Subtrees reachable after performing a reduction
- [reductions]: Pending non ϵ-reductions at each node, grouped by depth
- 'g reduction_closure: Complete ϵ-reductions information for an LR state
- [accepting], [failing]: Lookaheads that cause acceptance/failure
- [stacks]: Stack trees of ϵ-reductions
- [all_stacks], [all_reductions]: flattened ϵ-stacks and ϵ-reductions
- 'g target_trie: Trie for indexing reduction targets reached by sequences of LR(1) states.
E.g. if there is a goto transition `s0 -> s1` labelled `expression`, there will be
a path `s0 -> s1` labelled `expression target` in the trie.
- [sub]: Child nodes for each LR state
- [immediates]: States from which the reductions are immediate (ϵ-reductions by definition)
- [targets]: Targets reached by the current prefix.
- 'g graph: The minimized reduction graph, where each cell contains the
reductions applicable at that position, and each step contains the
transitions from that cell.
Tricky implementation details:
- The reduction graph is used to compute lookahead-dependent reduction
sequences. Each cell represents either a (state, lookahead) configuration,
or an intermediate step in a reduction sequence given by a triple (state,
depth, lookahead) (a non-deterministic transition which applies if
`state` is `depth` states deep in the stack).
- The [group_reductions] function groups items being reduced by their depth
in the stack, enabling efficient processing of nested reductions.
- The [index_targets] function creates a trie where each path corresponds
to a sequence of goto transitions leading to a target state. The trie
nodes mark "immediate" targets (directly reachable via reductions) and
track transitions via goto.
- The reduction graph construction uses a stream-based approach for
accessing predecessors, implemented via [get_stream] to avoid
recomputing them.
- The minimization via Valmari's algorithm preserves the reachability
structure needed for cost computation while reducing state space.
- The [step] type represents positions in the reduction graph, and
[cells_steps] maps each cell to its step index for efficient cost
computation.
- The [filter_reductions] function updates reduction lookahead sets when
the lookahead domain is restricted to preserve LR(1) behaviors.
- The [follow] function returns either an [Advance] (move to next step)
or a [Switch] (transition to different goto targets), enabling the
parser to navigate the reduction graph.
*)openFix.IndexingopenUtilsopenMiscopenInfo(*let printf_debug = false*)(* Merge reduction steps: combine reductions at the same depth, recursing on deeper levels. *)letrecmerge_reduction_stepmapacc=function|[]->(map,acc)|[]::rrs->merge_reduction_stepmapaccrrs|(r::rs)::rrs->letacc=iflist_is_emptyrsthenaccelsers::accinletaugment_ab=Some(IndexSet.unionab)inletmap=IndexMap.unionaugmentrmapinmerge_reduction_stepmapaccrrs(* Recursively merge all reduction depth layers, dropping empty ones. *)letrecmerge_reductions=function|[]->[]|rrs->letr,rrs'=merge_reduction_stepIndexMap.empty[]rrsinmatchmerge_reductionsrrs'with|[]whenIndexMap.is_emptyr->[]|rs->r::rs(* Step 1: pre-compute closure of ϵ-reductions *)(* Group items being reduced by their depth (reductions with zero, one, two producers, ...). *)letgroup_reductionsg=function|[]->[]|items->letrecgroupdepthacc=function|[]->[acc]|(it,la)::restwhendepth=Item.positiongit->letlhs=Production.lhsg(Item.productiongit)ingroupdepth(IndexMap.updatelhs(union_updatela)acc)rest|otherwise->acc::group(depth+1)IndexMap.emptyotherwiseinletcompare_items(it1,_)(it2,_)=Int.compare(Item.positiongit1)(Item.positiongit2)ingroup0IndexMap.empty(List.sortcompare_itemsitems)(* Verify that a list of reduction maps is non-empty at the deepest level. *)letrecvalidate=function|[]->true|[x]->not(IndexMap.is_emptyx)|_::xs->validatexstype'gstack_tree={next:('glr1indexlist*'gterminalindexset*'gstack_tree)list;reductions:('gnonterminal,'gterminalindexset)indexmaplist;}(* Fold over all reduction maps in a stack tree, traversing all branches. *)letfold_stack_reductionsfstacksacc=letrecauxacc{next;reductions}=letacc=freductionsaccinList.fold_leftaux_nextaccnextandaux_nextacc(_,_,stacks')=auxaccstacks'inauxaccstackstype'greduction_closure={accepting:'gterminalindexset;failing:'gterminalindexset;stacks:'gstack_tree;all_stacks:('glr1indexlist*'gterminalindexset)list;all_reductions:('gnonterminal,'gterminalindexset)indexmaplist;}type('g,'n)reduction_closures=('n,'greduction_closure)vector(* Add the intersection of [set] and [la] to the reference [r]. *)letadd_subsetgrsetla=r:=IndexSet.union(Terminal.intersectgsetla)!r(* Close ϵ-reductions of each LR(1) states *)letclose_lr1_reductions(typeg)(g:ggrammar):(glr1,greduction_closure)vector=Vector.init(Lr1.cardinalg)@@funlr1->letaccepting=refIndexSet.emptyinletfailing=refIndexSet.emptyinletgroup_stacks(items,next)=letreductions=group_reductionsgitemsinassert(validatereductions);{reductions;next}inletrecpoplookaheadacc(item:gitemindex)=function|[]->((item,lookahead)::fstacc,sndacc)|hd::tlasstack->matchItem.prevgitemwith|Someitem'->poplookaheadaccitem'tl|None->letlhs=Production.lhsg(Item.productiongitem)inletstack=Transition.find_goto_targetghdlhs::stackinletstacks=group_stacks(reducelookahead([],[])stack)in(fstacc,(stack,lookahead,stacks)::sndacc)andreducelookaheadaccstack=letlr1=List.hdstackinadd_subsetgfailing(Lr1.rejectglr1)lookahead;add_subsetgaccepting(Lr1.shift_onglr1)lookahead;IndexSet.foldbeginfunredacc->matchTerminal.intersectg(Reduction.lookaheadsgred)lookaheadwith|lawhenIndexSet.is_emptyla->acc|la->poplaacc(Item.lastg(Reduction.productiongred))stackend(Reduction.from_lr1glr1)accinletstacks=group_stacks(reduce(Terminal.allg)([],[])[lr1])inletfailing=!failinginletaccepting=!acceptinginletrecall_stackslaacc{next;_}=List.fold_left(funacc(stack,la',stacks)->letla=IndexSet.interlala'inifIndexSet.is_emptylathenaccelseall_stacksla((stack,la)::acc)stacks)accnextinletall_stacks=all_stacks(Terminal.allg)[([lr1],Terminal.allg)]stacksinletall_reductions=merge_reductions(fold_stack_reductionsList.consstacks[])inassert(validateall_reductions);{accepting;failing;stacks;all_stacks;all_reductions}(* Filter reduction lookahead sets to a restricted domain [la].
Preserves sharing when no filtering is needed. *)letrecfilter_reductionsgla=function|[]->[]|r::rsasrrs->letfiltered=reffalseinletr'=IndexMap.filter_map(fun_la'->letla''=Terminal.intersectglala'inifla'!=la''thenfiltered:=true;ifIndexSet.is_emptyla''thenNoneelseSomela'')rinletrs'=filter_reductionsglarsinifrs==rs'&¬!filteredthenrrselser'::rs'(* Reduction targets indexation *)moduleTarget=Unsafe_cardinal()type'gtarget='gTarget.ttype'gtargets=('gtarget,'gterminalindexset)indexmaptype'gtarget_trie={mutablesub:('glr1,'gtarget_trie)indexmap;mutableimmediates:'glr1indexset;mutabletargets:('glr1,'gtargetindex)indexmap;}letindex_targets(typeg)(g:ggrammar)rc:gtarget_trie*(ggoto_transition,gtargets)vector=(* Index sources of goto transitions *)letgoto_sources=Vector.make(Lr1.cardinalg)IndexSet.emptyinIndex.rev_iter(Transition.gotog)beginfungt->lettr=(Transition.of_gotoggt)ingoto_sources.@(Transition.targetgtr)<-IndexSet.addgtend;(* Allocate target identifiers *)letmoduleGen=Gensym()inletopenTarget.Eq(structtypet=gincludeGenend)inletRefl=eqin(* Targets by goto transition *)letby_goto=Vector.make(Transition.gotog)IndexMap.emptyin(* Manage trie nodes *)letfresh_node()={sub=IndexMap.empty;immediates=IndexSet.empty;targets=IndexMap.empty;}inletget_child(node,lr1)=matchIndexMap.find_optlr1node.subwith|Somenode'->node'|None->letnode'=fresh_node()innode.sub<-IndexMap.addlr1node'node.sub;node'inletroot=fresh_node()inroot.immediates<-Lr1.allg;letrecfollow_path=function|[]->assertfalse|[lr1]->(root,lr1)|lr1::path->(get_child(follow_pathpath),lr1)in(* Construct target trie *)Index.rev_iter(Lr1.cardinalg)beginfuntgt->(* For each LR(1), there are three sources of reduction targets:
- stacks directly reachable from this state,
these are marked as "immediate" in the trie
- goto transitions reaching this target (found using the goto_sources)
- composition of both
*)letrecvisit_stacksacc{next;reductions=_}=List.fold_leftbeginfunacc(stack,la,sub')->letacc=(follow_path(List.revstack),la)::accinvisit_stacksaccsub'endaccnextinletroots=visit_stacks[]rc.:(tgt).stacksin(* 1. Register immediates *)List.iter(fun((node,lr1),_)->node.immediates<-IndexSet.addlr1node.immediates)roots;(* Goto sources *)letsources=goto_sources.:(tgt)inifIndexSet.is_not_emptysourcesthen(* Prepend all goto transitions (by construction, rc stacks already end with tgt) *)letroots=(get_child(root,tgt),Terminal.allg)::List.map(fun(root,la)->(get_childroot,la))rootsinList.iterbeginfun(root,la)->IndexSet.iterbeginfungt->letsrc=Transition.sourceg(Transition.of_gotoggt)inletindex=matchIndexMap.find_optsrcroot.targetswith|Someindex->index|None->letindex=Gen.fresh()inroot.targets<-IndexMap.addsrcindexroot.targets;indexinby_goto.@(gt)<-IndexMap.addindexlaendsources;endrootsend;stopwatch2"indexed %d targets"(cardinalGen.n);(* Done *)(root,by_goto)(* Graph construction *)moduleStep=Unsafe_cardinal()type'gstep='gStep.t(* Stream accessor for lazy predecessor lists.
Materializes the stream up to index [i] on demand. *)letget_stream?(initial=0)stream=lets=refstreaminletd=refinitialinfuni->assert(i>=!d);whilei>!ddos:=Lazy.force(!s).lnext;incrd;done;(!s).lvaluetype'gtransition={reached:'gtargetindexset;reachable:'gtargetindexset;step:'gstepindex;}type'ggraph=('gstep,('glr1,'gtransitionlist)indexmap)vectorletmake(typeg)(g:ggrammar)(rc:(g,glr1)reduction_closures)(targets:(ggoto_transition,gtargets)vector):ggraph=letopenIndexBufferinletmoduleCells=Gensym()inletmoduleLinks=Gen.Make()inletcells:(Cells.n,glr1indexset)Dyn.t=Dyn.makeIndexSet.emptyinletopenstructtypelabel=glr1index*gtargetindexset*int*Cells.nindex*Cells.nindex*glr1indexsetendinletlinks:(Links.n,label)Gen.t=Links.get_generator()inlettable=Vector.make(Nonterminal.cardinalg)IndexSet.Map.emptyinletget_cellntla=letmap0=table.:(nt)inmatchIndexSet.Map.find_optlamap0with|Someindex->index|None->letindex=Cells.fresh()intable.:(nt)<-IndexSet.Map.addlaindexmap0;indexinletinitial=Cells.fresh()inletsink=Cells.fresh()inletrecexplore_cellcellntlasrc=letgt=Transition.find_gotogsrcntinletreached=IndexMap.deflatetargets.:(gt)(fun_la'->not(IndexSet.disjointlala'));inletpredecessors=get_stream(Lr1.predecessorsgsrc)inlettgt=Transition.targetg(Transition.of_gotoggt)inexplore_transitionscellsrcreachedlapredecessorsrc.:(tgt).all_reductionsandexplore_transitionscell0srcreachedla0predecessorsreductions=letresult=ref[]inList.iteribeginfundepthgoto->IndexMap.iterbeginfunntla->letla=IndexSet.interla0lainifIndexSet.is_not_emptylathen(letcell=get_cellntlainletstates=predecessorsdepthinletdone_=Dyn.getcellscellinlettodo=IndexSet.diffstatesdone_inpushresult(src,reached,depth,cell0,cell,states);ifIndexSet.is_not_emptytodothen(Dyn.setcellscell(IndexSet.uniontododone_);IndexSet.rev_iter(explore_cellcellntla)todo;));endgotoendreductions;match!resultwith|[]->ignore(Gen.addlinks(src,reached,0,cell0,sink,IndexSet.empty));|result->List.iter(funtr->ignore(Gen.addlinkstr))resultinIndex.iter(Lr1.cardinalg)beginfunlr1->letpredecessors=get_stream~initial:(-1)(Lr1.predecessorsglr1)inexplore_transitionsinitiallr1IndexSet.empty(Terminal.regularg)predecessorsrc.:(lr1).all_reductionsend;stopwatch2"raw redgraph: %d cells, %d links"(cardinalCells.n)(cardinalLinks.n);letmoduleMin=Valmari.Minimize(structtypet=labelletcompare(lr1,targets1,depth1,_src1,_dst1,states1)(lr2,targets2,depth2,_src2,_dst2,states2)=letc=Index.comparelr1lr2inifc<>0thencelseletc=Int.comparedepth1depth2inifc<>0thencelseletc=IndexSet.comparetargets1targets2inifc<>0thencelseletc=IndexSet.comparestates1states2incend)(structtypestates=Cells.nletstates=Cells.ntypetransitions=Links.nlettransitions=Links.nletsourcetr=let(_,_,_,x,_,_)=Gen.getlinkstrinxlettargettr=let(_,_,_,_,x,_)=Gen.getlinkstrinxletlabeltr=Gen.getlinkstrletinitialsf=finitialletfinalsf=Index.iterCells.nfletrefinementsf=f(fun~add->addinitial);f(fun~add->addsink)end)inletinitial=Option.get(Min.transport_stateinitial)inletsink=Option.get(Min.transport_statesink)instopwatch2"minimized redgraph: %d cells, %d links"(cardinalMin.states)(cardinalMin.transitions);letcells_outgoing=Vector.makeMin.statesIndexMap.emptyinletcells_depth=Vector.makeMin.states0inIndex.rev_iterMin.transitionsbeginfuntr->letsource=Min.sourcetrinlettarget=Min.targettrinletlr,_,depth,_,_,_=Min.labeltrincells_outgoing.@(source)<-IndexMap.updatelr(add_updatetr);cells_depth.@(target)<-Int.maxdepthend;stopwatch2"redgraph: indexed transitions";letsuccftr=let(_,_,_,_,_,states)=Min.labeltrinletoutgoing=cells_outgoing.:(Min.targettr)inIndexSet.rev_iter(funsrc->IndexSet.iterf(IndexMap.findsrcoutgoing))statesinletreachable=Vector.initMin.transitions(funtr->letacc=refIndexSet.emptyinsucc(funtr'->let(_,targets,_,_,_,_)=Min.labeltr'inacc:=IndexSet.uniontargets!acc)tr;!acc)inTarjan.close_relationsuccreachable;stopwatch2"redgraph: reachability closure";letmoduleSteps=Step.Const(structtypet=gletcardinal=Vector.fold_left(+)(Vector.length_as_intcells_depth-1)cells_depthlet()=stopwatch2"redgraph: %d steps"cardinalend)inletenum=Index.enumerateSteps.ninletstep_zero=enum()inletcells_steps=Vector.mapi(funcelldepth->ifcell=initial||cell=sinkthenstep_zeroelse(for_=0todepth-1doignore(enum())done;enum()))cells_depthinletsteps=Vector.makeSteps.nIndexMap.emptyinVector.rev_iteribeginfuncellstep->steps.:(step)<-IndexMap.mapbeginfuntrs->List.map(funtr->let(_,reached,depth,_,_,_)=Min.labeltrinletreachable=reachable.:(tr)inlettarget=cells_steps.:(Min.targettr)inletstep=Index.of_intSteps.n(Index.to_inttarget-depth)in{reached;reachable;step})(IndexSet.elementstrs)endcells_outgoing.:(cell)endcells_steps;stepstype'gaction=|Advanceof'gstepindex|Switchof('glr1,'gtransitionlist)indexmap(* Get initial transitions for a given LR(1) state from the graph entry point. *)letinitial(typeg)(gr:ggraph)(lr1:glr1index)=matchIndexMap.find_optlr1(Vector.as_arraygr).(0)with|None->[]|Somel->l(* Follow a step in the reduction graph.
Step 0 returns an empty switch; empty maps advance; non-empty maps switch. *)letfollowgrstep=match(step:_index:>int)with|0->SwitchIndexMap.empty|step'->letmap=gr.:(step)inifIndexMap.is_emptymapthenAdvance(Index.of_int(Vector.lengthgr)(step'+1))elseSwitchmap