12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064(* MIT License
*
* Copyright (c) 2025 Frédéric Bour
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*)(** Grammar information and index management
This module defines comprehensive data structures for representing grammars
and provides indexed representations of all grammar elements (terminals,
non-terminals, productions, LR states, items, etc.).
Design principles:
- The module uses type-level index cardinalities to ensure type-safe
access to grammar structures.
- All data structures are vector-based for efficient random access.
- The module extends Menhir's grammar representation with additional
convenience functions and derived information.
Key data structures:
- Grammar: Contains all grammar information:
- [terminal_*, nonterminal_*]: Sets and tables for terminals and
non-terminals
- [production_*]: Productions with LHS and RHS information
- [item_*]: LR(0) items derived from productions
- [lr0_*], [lr1_*]: LR(0) and LR(1) states
- [transition_*]: Transitions between states (shift and goto)
- [reduction_*]: Reductions available at each LR state
- Indexing:
- Each grammar element is assigned a unique index
- Index vectors enable O(1) lookup of properties
- The [Load_grammar] functor computes all the index mappings from
Menhir's grammar representation
Tricky implementation details:
- The [Item] module computes offsets for items based on production
lengths, enabling efficient conversion between production+position
and item indices.
- The [Transition] module separately tracks goto and shift transitions,
with [goto_table] enabling efficient lookup of goto transitions by
(state, nonterminal) pair.
- The [Reduction] module groups reductions by (state, production) pairs
with their lookahead sets, supporting efficient lookup of applicable
reductions.
- The [Symbol] module provides both terminal/nonterminal projections and
the combined symbol type for representing grammar symbols.
- The [find] functions with [approx] support fuzzy matching and provide
helpful error messages with suggestions when symbols are not found.
*)openUtilsopenMiscopenFix.IndexingmoduletypeGRAMMAR=MenhirSdk.Cmly_api.GRAMMARmoduleUC_terminal=Unsafe_cardinal()moduleUC_nonterminal=Unsafe_cardinal()moduleUC_production=Unsafe_cardinal()moduleUC_lr0=Unsafe_cardinal()moduleUC_lr1=Unsafe_cardinal()moduleUC_item=Unsafe_cardinal()moduleUC_goto_transition=Unsafe_cardinal()moduleUC_shift_transition=Unsafe_cardinal()moduleUC_reduction=Unsafe_cardinal()type'gterminal='gUC_terminal.ttype'gnonterminal='gUC_nonterminal.ttype'gsymbol=('gterminal,'gnonterminal)Sum.ntype'gproduction='gUC_production.ttype'gitem='gUC_item.ttype'glr0='gUC_lr0.ttype'glr1='gUC_lr1.ttype'ggoto_transition='gUC_goto_transition.ttype'gshift_transition='gUC_shift_transition.ttype'gtransition=('ggoto_transition,'gshift_transition)Sum.ntype'greduction='gUC_reduction.ttype'ggrammar={raw:(moduleMenhirSdk.Cmly_api.GRAMMAR);terminal_n:'gterminalcardinal;terminal_all:'gterminalindexset;terminal_regular:'gterminalindexset;terminal_table:(string,'gterminalindex)Hashtbl.t;terminal_aliases:(string,string)Hashtbl.tlazy_t;nonterminal_n:'gnonterminalcardinal;nonterminal_all:'gnonterminalindexset;nonterminal_table:(string,'gnonterminalindex)Hashtbl.t;symbol_all:'gsymbolindexset;production_lhs:('gproduction,'gnonterminalindex)vector;production_rhs:('gproduction,'gsymbolindexarray)vector;production_all:'gproductionindexset;item_productions:('gitem,'gproductionindex)vector;item_offsets:('gproduction,int)vector;lr0_items:('glr0,'gitemindexset)vector;lr0_incoming:('glr0,'gsymbolindexoption)vector;lr0_is_entrypoint:('glr0,'gproductionindexoption)vector;transition_source:('gtransition,'glr1index)vector;transition_target:('gtransition,'glr1index)vector;transition_shift_sym:('gshift_transition,'gterminalindex)vector;(*transition_shift_table: ('g lr1, ('g terminal, 'g shift_transition index) indexmap) vector;*)transition_goto_sym:('ggoto_transition,'gnonterminalindex)vector;transition_goto_table:('glr1,('gnonterminal,'ggoto_transitionindex)indexmap)vector;transition_predecessors:('glr1,'gtransitionindexset)vector;transition_successors:('glr1,'gtransitionindexset)vector;transition_accepting:'ggoto_transitionindexset;lr1_all:'glr1indexset;lr1_lr0:('glr1,'glr0index)vector;lr1_wait:'glr1indexset;lr1_accepting:'glr1indexset;lr1_reduce_on:('glr1,'gterminalindexset)vector;lr1_shift_on:('glr1,'gterminalindexset)vector;lr1_reject:('glr1,'gterminalindexset)vector;lr1_entrypoints:'glr1indexset;lr1_entrypoint_table:(string,'glr1index)Hashtbl.t;lr1_predecessors:('glr1,'glr1indexsetlazy_stream)vector;reduction_state:('greduction,'glr1index)vector;reduction_production:('greduction,'gproductionindex)vector;reduction_lookaheads:('greduction,'gterminalindexset)vector;reduction_from_lr1:('glr1,'greductionindexset)vector;}letrawg=g.rawmoduleLoad_grammar(G:MenhirSdk.Cmly_api.GRAMMAR)=structtypegmoduleImport(UC:UNSAFE_CARDINAL)(M:sigtypetvalcount:intvalof_int:int->tvalto_int:t->intend)=structincludeUC.Const(structtypet=gletcardinal=M.countend)letof_gi=Index.of_intn(M.to_inti)letto_gi=M.of_int(Index.to_inti)letall=IndexSet.allnendmoduleTerminal=structincludeImport(UC_terminal)(G.Terminal)letregular=IndexSet.init_from_setn(funt->matchG.Terminal.kind(G.Terminal.of_int(t:_index:>int))with|`EOF|`REGULAR->true|`PSEUDO|`ERROR->false)letaliases=lazy(letb=Buffer.create32inletunescapes=letlength=String.lengthsiniflength=0||s.[0]<>'"'thenselseletlength=ifs.[length-1]='"'thenlength-1elselengthinleti=ref1inBuffer.clearb;while!i<lengthdomatchs.[!i]with|'\\'->if!i+1<lengththenbeginmatchs.[!i+1]with|'"'|'\\'asc->Buffer.add_charbc|'n'->Buffer.add_charb'\n'|'r'->Buffer.add_charb'\r'|'t'->Buffer.add_charb'\t'|c->Buffer.add_charbcend;i:=!i+2|c->Buffer.add_charbc;incridone;Buffer.contentsbinlettable=Hashtbl.create7inletopenG.SurfaceinList.iterbeginfun(name,token)->matchToken.aliastokenwith|Somealias->Hashtbl.addtablename(unescapealias)|None->()end(Syntax.tokensG.Surface.before_inlining);table)endmoduleNonterminal=Import(UC_nonterminal)(G.Nonterminal)moduleSymbol=structletn=Sum.cardinalTerminal.nNonterminal.nletall=IndexSet.allnletof_g=function|G.Tt->Sum.inj_l(Terminal.of_gt)|G.Nn->Sum.inj_rTerminal.n(Nonterminal.of_gn)(*let to_g t = match Sum.prj Terminal.n t with
| L t -> G.T (Terminal.to_g t)
| R n -> G.N (Nonterminal.to_g n)*)endmoduleProduction=structincludeImport(UC_production)(G.Production)letlhs=Vector.initn(funp->Nonterminal.of_g(G.Production.lhs(to_gp)))letrhs=Vector.initn@@funp->Array.map(fun(sym,_,_)->Symbol.of_gsym)(G.Production.rhs(to_gp))endmoduleItem=structletcount=ref0letoffsets=Vector.initProduction.n(funprod->letposition=!countincount:=!count+Array.lengthProduction.rhs.:(prod)+1;position)includeUC_item.Const(structtypet=gletcardinal=!countend)letproductions=Vector.make'n(fun()->Index.of_intProduction.n0)let()=letenum=Index.enumerateninIndex.iterProduction.n@@funprod->for_=0toArray.lengthProduction.rhs.:(prod)doproductions.:(enum())<-proddoneendmoduleLr0=structincludeImport(UC_lr0)(G.Lr0)letitems=Vector.initn@@funlr0->to_glr0|>G.Lr0.items|>List.map(fun(p,pos)->Index.of_intItem.n(Item.offsets.:(Production.of_gp)+pos))|>IndexSet.of_listletincoming=Vector.initn@@funlr0->to_glr0|>G.Lr0.incoming|>Option.mapSymbol.of_gletis_entrypoint=Vector.map(funitems->ifnot(IndexSet.is_singletonitems)thenNoneelseletitem=IndexSet.chooseitemsinletprod=Item.productions.:(item)inifIndex.to_intitem=Item.offsets.:(prod)thenSomeprodelseNone)itemsendmoduleLr1=structincludeImport(UC_lr1)(G.Lr1)letlr0=Vector.initn@@funlr1->Lr0.of_g(G.Lr1.lr0(to_glr1))endmoduleTransition=structletshift_count,goto_count=letshift_count=ref0inletgoto_count=ref0in(* Count goto and shift transitions by iterating on all states and
transitions *)G.Lr1.iterbeginfunlr1->List.iterbeginfun(sym,_)->matchsymwith|G.T_->incrshift_count|G.N_->incrgoto_countend(G.Lr1.transitionslr1)end;(!shift_count,!goto_count)moduleGoto=UC_goto_transition.Const(structtypet=gletcardinal=goto_countend)moduleShift=UC_shift_transition.Const(structtypet=gletcardinal=shift_countend)letany=Sum.cardinalGoto.nShift.nletof_goto=Sum.inj_lletof_shift=Sum.inj_rGoto.n(* Vectors to store information on states and transitions.
We allocate a bunch of data structures (sources, targets, t_symbols,
nt_symbols and predecessors vectors, t_table and nt_table hash tables),
and then populate them by iterating over all transitions.
*)letsources=Vector.make'any(fun()->Index.of_intLr1.n0)lettargets=Vector.make'any(fun()->Index.of_intLr1.n0)letshift_sym=Vector.make'Shift.n(fun()->Index.of_intTerminal.n0)letgoto_sym=Vector.make'Goto.n(fun()->Index.of_intNonterminal.n0)(* Tables to associate a pair of a state and a symbol to a transition. *)letgoto_table=Vector.makeLr1.nIndexMap.empty(*let shift_table = Vector.make Lr1.n IndexMap.empty*)(* A vector to store the predecessors of an lr1 state.
We cannot compute them directly, we discover them by exploring the
successor relation below. *)letpredecessors=Vector.makeLr1.nIndexSet.emptyletsuccessors=(* We populate all the data structures allocated above, i.e.
the vectors t_sources, t_symbols, t_targets, nt_sources, nt_symbols,
nt_targets and predecessors, as well as the tables t_table and
nt_table, by iterating over all successors. *)letnext_goto=Index.enumerateGoto.ninletnext_shift=Index.enumerateShift.ninVector.initLr1.nbeginfunsource->List.fold_rightbeginfun(sym,target)acc->lettarget=Lr1.of_gtargetinletindex=matchsymwith|G.Tt->lett=Terminal.of_gtinletindex=next_shift()inshift_sym.:(index)<-t;(*shift_table.@(source) <- IndexMap.add t index;*)of_shiftindex|G.Nnt->letnt=Nonterminal.of_gntinletindex=next_goto()ingoto_sym.:(index)<-nt;goto_table.@(source)<-IndexMap.addntindex;of_gotoindexinsources.:(index)<-source;targets.:(index)<-target;predecessors.@(target)<-IndexSet.addindex;IndexSet.addindexaccend(G.Lr1.transitions(Lr1.to_gsource))IndexSet.emptyendletaccepting=letacc=refIndexSet.emptyinIndex.rev_iterLr1.nbeginfunlr1->matchLr0.is_entrypoint.:(Lr1.lr0.:(lr1))with|None->()|Someprod->letsym=matchSum.prjTerminal.nProduction.rhs.:(prod).(0)with|L_->assertfalse|Rnt->ntinacc:=IndexSet.fold_right(funacctr->matchSum.prjGoto.ntrwith|Lgtwhengoto_sym.:(gt)=sym->IndexSet.addgtacc|_->acc)!accsuccessors.:(lr1)end;!accendmoduleLr1_extra=structopenLr1letaccepting=refIndexSet.empty(** The set of terminals that will trigger a reduction *)letreduce_on=Vector.initn@@funlr1->List.fold_left(funacc(t,_)->ifG.Terminal.kindt=`PSEUDOthenaccepting:=IndexSet.addlr1!accepting;IndexSet.add(Terminal.of_gt)acc)IndexSet.empty(G.Lr1.get_reductions(to_glr1))letaccepting=!accepting(** The set of terminals that will trigger a shift transition *)letshift_on=Vector.initn@@funlr1->List.fold_left(funacc(sym,_raw)->matchsymwith|G.Tt->IndexSet.add(Terminal.of_gt)acc|G.N_->acc)IndexSet.empty(G.Lr1.transitions(to_glr1))(** The set of terminals the state has no transition for *)letreject=Vector.initn@@funlr1->letresult=Terminal.allinletresult=IndexSet.diffresultreduce_on.:(lr1)inletresult=IndexSet.diffresultshift_on.:(lr1)inresultletwait=IndexSet.init_from_setn(funlr1->matchG.Lr0.incoming(Lr0.to_glr0.:(lr1))with|Some(G.N_)->false|Some(G.Tt)->G.Terminal.kindt=`REGULAR&¬(IndexSet.memlr1accepting)|None->true)letpredecessors=Vector.initn@@funlr1->IndexSet.map(funtr->Transition.sources.:(tr))Transition.predecessors.:(lr1)letentrypoints,entrypoint_table=letset=refIndexSet.emptyinlettable=Hashtbl.create7inIndex.rev_itern(funlr1->matchLr0.is_entrypoint.:(lr0.:(lr1))with|None->()|Someprod->set:=IndexSet.addlr1!set;letsym,_,_=(G.Production.rhs(Production.to_gprod)).(0)inHashtbl.addtable(G.Symbol.namesym)lr1);(!set,table)endmoduleReduction=structletn=ref0letraw=letimport_redreds=reds|>List.filter_map(fun(t,p)->matchG.Production.kindpwith|`START->None|`REGULAR->Some(Production.of_gp,Terminal.of_gt))|>Misc.group_by~compare:(fun(p1,_)(p2,_)->compare_indexp1p2)~group:(fun(p,t)ps->p,IndexSet.of_list(t::List.mapsndps))|>List.sort(fun(p1,_)(p2,_)->letl1=Array.lengthProduction.rhs.:(p1)inletl2=Array.lengthProduction.rhs.:(p2)inletc=Int.comparel1l2inifc<>0thencelsecompare_indexProduction.lhs.:(p1)Production.lhs.:(p2))inletimport_lr1lr1=letreds=import_red(G.Lr1.get_reductions(Lr1.to_glr1))inn:=!n+List.lengthreds;redsinVector.initLr1.nimport_lr1includeUC_reduction.Const(structtypet=gletcardinal=!nend)letstate=Vector.make'n(fun()->Index.of_intLr1.n0)letproduction=Vector.make'n(fun()->Index.of_intProduction.n0)letlookaheads=Vector.makenIndexSet.emptyletfrom_lr1=letenum=Index.enumerateninVector.mapi(funlr1reds->List.fold_left(funset(prod,la)->leti=enum()instate.:(i)<-lr1;production.:(i)<-prod;lookaheads.:(i)<-la;IndexSet.addiset)IndexSet.emptyreds)rawendletgrammar={raw=(moduleG);terminal_n=Terminal.n;terminal_all=Terminal.all;terminal_regular=Terminal.regular;terminal_table=Hashtbl.create7;terminal_aliases=Terminal.aliases;nonterminal_n=Nonterminal.n;nonterminal_all=Nonterminal.all;nonterminal_table=Hashtbl.create7;symbol_all=Symbol.all;production_lhs=Production.lhs;production_rhs=Production.rhs;production_all=Production.all;item_productions=Item.productions;item_offsets=Item.offsets;lr0_items=Lr0.items;lr0_incoming=Lr0.incoming;lr0_is_entrypoint=Lr0.is_entrypoint;transition_source=Transition.sources;transition_target=Transition.targets;transition_shift_sym=Transition.shift_sym;(*transition_shift_table = Transition.shift_table;*)transition_goto_sym=Transition.goto_sym;transition_goto_table=Transition.goto_table;transition_predecessors=Transition.predecessors;transition_successors=Transition.successors;transition_accepting=Transition.accepting;lr1_all=Lr1.all;lr1_lr0=Lr1.lr0;lr1_wait=Lr1_extra.wait;lr1_accepting=Lr1_extra.accepting;lr1_reduce_on=Lr1_extra.reduce_on;lr1_shift_on=Lr1_extra.shift_on;lr1_reject=Lr1_extra.reject;lr1_entrypoints=Lr1_extra.entrypoints;lr1_entrypoint_table=Lr1_extra.entrypoint_table;lr1_predecessors=iterate_vectorLr1_extra.predecessors;reduction_state=Reduction.state;reduction_production=Reduction.production;reduction_lookaheads=Reduction.lookaheads;reduction_from_lr1=Reduction.from_lr1;}endmoduletypeINDEXED=sigtype'gnvalcardinal:'ggrammar->'gncardinalvalof_int:'ggrammar->int->'gnindexendmoduleTerminal=structtype'gn='gterminalletcardinalg=g.terminal_nletof_intgi=Index.of_int(cardinalg)i(** Converts a terminal index to its string representation *)letto_stringgi=letopen(valg.raw)inTerminal.name(Terminal.of_int(Index.to_inti))(** Returns the alias for a terminal, if one exists *)letaliasgi=Hashtbl.find_opt(Lazy.forceg.terminal_aliases)(to_stringgi)(** Returns the set of all terminals *)letallg=g.terminal_all(** Returns the set of regular terminals, excluding EOF, ERROR, and pseudo-terminals *)letregularg=g.terminal_regular(** Returns the semantic value type of a terminal *)letsemantic_valuegi=letopen(valg.raw)inTerminal.typ(Terminal.of_int(Index.to_inti))(** Optimized intersection: short-circuits when either argument is [all] *)letintersectgab=ifa==g.terminal_allthenbelseifb==g.terminal_allthenaelseIndexSet.interab(** Returns [true] if the terminal is the special ERROR symbol *)letis_errorgi=letopen(valg.raw)inmatchTerminal.kind(Terminal.of_int(i:_index:>int))with|`ERROR->true|_->false(** Converts a set of lookahead terminals to a human-readable string.
Sets larger than 10 elements are abbreviated as "<n lookaheads>" *)letlookaheads_to_stringgla=matchIndexSet.cardinallawith|nwhenn>10->Printf.sprintf"<%d lookaheads>"n|_->string_concat_map~wrap:("<",">")","(to_stringg)(IndexSet.elementsla)(** Lazily builds and returns the terminal name-to-index lookup table *)letterminal_tableg=ifHashtbl.lengthg.terminal_table=0thenIndex.iter(cardinalg)(funt->Hashtbl.addg.terminal_table(to_stringgt)t);g.terminal_table(** Finds a terminal by name. With [approx > 0], returns fuzzy match
suggestions when the exact name is not found. *)letfindg?(approx=3)name=lettable=terminal_tableginmatchHashtbl.find_opttablename,approxwith|Somet,_->Result.Okt|None,0->Result.Error[]|None,dist->Result.Error(Damerau_levenshtein.filter_approx~distname(Hashtbl.to_seqtable))endmoduleNonterminal=structtype'gn='gnonterminalletcardinalg=g.nonterminal_nletof_intgi=Index.of_int(cardinalg)i(** Returns the set of all non-terminals *)letallg=g.nonterminal_all(** Converts a nonterminal index to its name string *)letto_stringgi=letopen(valg.raw)inNonterminal.name(Nonterminal.of_int(Index.to_inti))(** Converts a nonterminal index to its mangled name (used internally by Menhir) *)letto_mangled_stringgi=letopen(valg.raw)inNonterminal.mangled_name(Nonterminal.of_int(Index.to_inti))(** Finds a nonterminal by its mangled name. Linear search, not cached. *)letfind_mangledgstr=letenum=Index.enumerate(cardinalg)inletrecloop()=leti=enum()inifto_mangled_stringgi=strthenielseloop()inmatchloop()with|i->Somei|exceptionIndex.End_of_set->None(** Returns [`REGULAR] for ordinary non-terminals and [`START] for entrypoint non-terminals *)letkindgi=letopen(valg.raw)inNonterminal.kind(Nonterminal.of_int(Index.to_inti))(** Returns the semantic value type of a nonterminal *)letsemantic_valuegi=letopen(valg.raw)inNonterminal.typ(Nonterminal.of_int(Index.to_inti))(** Returns [true] if the nonterminal can derive the empty string *)letnullablegi=letopen(valg.raw)inNonterminal.nullable(Nonterminal.of_int(Index.to_inti))(** Returns the FIRST set of a nonterminal: the set of terminals that can begin
a string derived from this nonterminal *)letfirstgi=letopen(valg.raw)inNonterminal.of_int(Index.to_inti)|>Nonterminal.first|>List.map(funt->Index.of_intg.terminal_n(Terminal.to_intt))|>IndexSet.of_list(** Lazily builds and returns the nonterminal name-to-index lookup table *)letnonterminal_tableg=ifHashtbl.lengthg.nonterminal_table=0thenIndex.iter(cardinalg)(funt->Hashtbl.addg.nonterminal_table(to_stringgt)t);g.nonterminal_table(** Finds a nonterminal by name. Checks both regular and mangled names.
With [approx > 0], returns fuzzy match suggestions on failure. *)letfindg?(approx=3)name=lettable=nonterminal_tableginmatchHashtbl.find_opttablename,approxwith|Somet,_->Result.Okt|None,0->Result.Error(`Dym[])|None,dist->matchfind_mangledgnamewith|Somei->Result.Error(`Mangledi)|None->letcandidates=Damerau_levenshtein.filter_approx~distname(Hashtbl.to_seqtable)inResult.Error(`Dymcandidates)endmoduleSymbol=structtype'gn='gsymbolletcardinalg=Sum.cardinalg.terminal_ng.nonterminal_nletof_intgi=Index.of_int(cardinalg)i(** Discriminated union of terminal and nonterminal indices *)type'gdesc=|Tof'gterminalindex|Nof'gnonterminalindex(** Internal projection of symbol index into terminal/nonterminal sum *)letprjgi=Sum.prjg.terminal_ni(** Returns the symbol as a discriminated union: [T] for terminals, [N] for non-terminals *)letdescgi=matchprjgiwith|Lt->Tt|Rn->Nn(** Returns [true] if the symbol is a terminal *)letis_terminalgt=matchprjgtwith|L_->true|R_->false(** Returns [true] if the symbol is a non-terminal *)letis_nonterminalgt=matchprjgtwith|L_->false|R_->true(** Converts a symbol to its name string. With [mangled:true], returns Menhir's internal name. *)letto_stringg?mangledt=letopen(valg.raw)inmatchprjgtwith|Lt->symbol_name?mangled(T(Terminal.of_int(Index.to_intt)))|Rn->symbol_name?mangled(N(Nonterminal.of_int(Index.to_intn)))(** Returns the semantic value type of a symbol. For terminals without a semantic
value, returns [Some "unit"]. *)letsemantic_valuegt=matchprjgtwith|Lt->Some(Option.value(Terminal.semantic_valuegt)~default:"unit")|Rn->Nonterminal.semantic_valuegn(** Returns the set of all symbols (terminals and non-terminals) *)letallg=g.symbol_all(** Inject a terminal index into the symbol index space *)letinj_t_t=Sum.inj_lt(** Inject a nonterminal index into the symbol index space *)letinj_ngn=Sum.inj_rg.terminal_nn(** Finds a symbol (terminal or nonterminal) by name. Checks both regular and
mangled names. With [approx > 0], returns fuzzy match suggestions on failure. *)letfindg?(approx=3)name=letttable=Terminal.terminal_tableginmatchHashtbl.find_optttablenamewith|Somet->Result.Ok(inj_tgt)|None->letntable=Nonterminal.nonterminal_tableginmatchHashtbl.find_optntablename,approxwith|Somen,_->Result.Ok(inj_ngn)|None,0->Result.Error(`Dym[])|None,dist->matchNonterminal.find_mangledgnamewith|Somei->Result.Error(`Mangledi)|None->letcandidates=Damerau_levenshtein.filter_approx~distname(Seq.append(Seq.map(fun(s,t)->(s,inj_tgt))(Hashtbl.to_seqttable))(Seq.map(fun(s,n)->(s,inj_ngn))(Hashtbl.to_seqntable)))inResult.Error(`Dymcandidates)endmoduleProduction=structtype'gn='gproductionletcardinalg=Vector.lengthg.production_lhsletof_intgi=Index.of_int(cardinalg)i(** Returns the left-hand side nonterminal of a production *)letlhsgi=g.production_lhs.:(i)(** Returns the right-hand side symbols of a production *)letrhsgi=g.production_rhs.:(i)(** Returns the number of symbols on the right-hand side *)letlengthgi=Array.length(rhsgi)(** Returns [`REGULAR] for ordinary productions and [`START] for pseudo start productions *)letkindgi=letopen(valg.raw)inProduction.kind(Production.of_int(Index.to_inti))(** Returns the set of all productions *)letallg=g.production_allend(** Explicit representation of LR(0) items.
An item is a production with a dot position: [A -> α . β].
Items are indexed globally across all productions for efficient set operations. *)moduleItem=structtype'gn='gitemletcardinalg=Vector.lengthg.item_productionsletof_intgi=Index.of_int(cardinalg)i(** [make g prod pos] creates an item for production [prod] with the dot
at position [pos]. Raises [Invalid_argument] if [pos] is out of bounds. *)letmakegprodpos=ifpos<0||pos>Production.lengthgprodtheninvalid_arg"Info.Item.make: pos out of bounds";Index.of_int(cardinalg)(g.item_offsets.:(prod)+pos)(** Creates an item with the dot at the end of the production (fully recognized) *)letlastgprod=makegprod(Production.lengthgprod)(** Returns the production that this item belongs to *)letproductiongi=g.item_productions.:(i)(** Returns the dot position within the item's production (0 = before all symbols) *)letpositiongi=((i:_index:>int)-g.item_offsets.:(productiongi))(** Returns the (production, position) pair for the item *)letdescgi=letprod=productiongiin(prod,(i:_index:>int)-g.item_offsets.:(prod))(** Returns the previous item in the same production (dot moved one position left),
or [None] if the dot is already at position 0. *)letprevg(i:'gnindex)=matchIndex.prediwith|Somejwhennot(Index.equal(productiongi)(productiongj))->None|result->result(** Returns [true] if the dot is at the end of the production (ready to reduce) *)letis_reduciblegi=letprod=productiongiin((i:_index:>int)-g.item_offsets.:(prod))=Production.lengthgprod(** Converts an item to standard notation string, e.g. "A:B . c d" *)letto_stringgi=letprod,pos=descgiinletb=Buffer.create63inBuffer.add_stringb(Nonterminal.to_stringg(Production.lhsgprod));Buffer.add_charb':';letrhs=Production.rhsgprodinletadd_symsym=Buffer.add_charb' ';Buffer.add_stringb(Symbol.to_stringgsym);infori=0topos-1doadd_symrhs.(i)done;Buffer.add_stringb" .";fori=postoArray.lengthrhs-1doadd_symrhs.(i)done;Buffer.contentsbend(** LR(0) state information.
LR(0) states represent the "core" of LR(1) states, ignoring lookahead information. *)moduleLr0=structtype'gn='glr0letcardinalg=Vector.lengthg.lr0_itemsletof_intgi=Index.of_int(cardinalg)i(** Returns the symbol that labels the transition into this state.
[None] for initial states. *)letincominggi=g.lr0_incoming.:(i)(** Returns the set of LR(0) items in this state (kernel items before closure) *)letitemsgi=g.lr0_items.:(i)(** Returns [Some prod] if this is an initial state for an entrypoint,
where [prod] is the pseudo start production. [None] otherwise. *)letis_entrypointgi=g.lr0_is_entrypoint.:(i)end(** LR(1) state information.
LR(1) states extend LR(0) cores with lookahead information. *)moduleLr1=structtype'gn='glr1letcardinalg=Vector.lengthg.lr1_reduce_onletof_intgi=Index.of_int(cardinalg)i(** Returns the set of all LR(1) states *)letallg=g.lr1_all(** Returns the set of accepting states (reached after recognizing an entrypoint) *)letacceptingg=g.lr1_accepting(** Returns the set of "wait" states: states where the parser must read more input.
Includes initial states and shift transition targets, excluding accepting states. *)letwaitg=g.lr1_wait(** Returns the LR(0) "core" state corresponding to this LR(1) state *)letto_lr0gi=g.lr1_lr0.:(i)(** Returns the symbol labeling the incoming transition. [None] for initial states. *)letincominggi=Lr0.incomingg(to_lr0gi)(** Returns the kernel items of the state (before closure) *)letitemsgi=Lr0.itemsg(to_lr0gi)(** Returns [Some prod] if this is an entrypoint state, [None] otherwise *)letis_entrypointgi=Lr0.is_entrypointg(to_lr0gi)(** Hash table mapping entrypoint names to their LR(1) states *)letentrypoint_tableg=g.lr1_entrypoint_table(** Returns the set of entrypoint states *)letentrypointsg=g.lr1_entrypoints(** Debug printing functions. Formats are not stable across versions. *)(** Converts the incoming symbol of a state to a debug string *)letsymbol_to_stringglr1=matchincomingglr1with|Somesym->Symbol.to_stringgsym|None->letentrypoint=Option.get(is_entrypointglr1)in(Symbol.to_stringg(Production.rhsgentrypoint).(0)^":")(** Converts an LR(1) state to a debug string *)letto_stringglr1=string_of_indexlr1^":"^symbol_to_stringglr1(** Converts a list of LR(1) states to a debug string *)letlist_to_stringglr1s=string_concat_map~wrap:("[","]")"; "(to_stringg)lr1s(** Converts a set of LR(1) states to a debug string *)letset_to_stringglr1s=string_concat_map~wrap:("{","}")", "(to_stringg)(IndexSet.elementslr1s)(** Returns the set of terminals that state [i] can shift on *)letshift_ongi=g.lr1_shift_on.:(i)(** Returns the set of terminals that trigger a reduction in state [i] *)letreduce_ongi=g.lr1_reduce_on.:(i)(** Returns the set of terminals that cause a syntax error in state [i] *)letrejectgi=g.lr1_reject.:(i)(** Returns the lazy stream of predecessor states (states with transitions to [i]) *)letpredecessorsgi=g.lr1_predecessors.:(i)(** Optimized intersection: short-circuits when either argument is [all] *)letintersectgab=ifa==g.lr1_allthenbelseifb==g.lr1_allthenaelseIndexSet.interab(** Returns the default reduction for the state, if any. Some states have a single
applicable reduction that can be taken without checking the lookahead. *)letdefault_reductiongi=letopen(valg.raw)inmatchLr1.default_reduction(Lr1.of_int(i:_index:>int))with|None->None|Somep->Some(Index.of_int(Vector.lengthg.production_rhs)(Production.to_intp))end(** Reduction information.
A reduction is a (state, production, lookahead set) triple, meaning that
in the given state, when the lookahead terminal is in the set, the parser
should reduce by the given production. *)moduleReduction=structtype'gn='greductionletcardinalg=Vector.lengthg.reduction_productionletof_intgi=Index.of_int(cardinalg)i(** Returns the LR(1) state where this reduction applies *)letstategi=g.reduction_state.:(i)(** Returns the production that this reduction reduces by *)letproductiongi=g.reduction_production.:(i)(** Returns the set of lookahead terminals that trigger this reduction *)letlookaheadsgi=g.reduction_lookaheads.:(i)(** Returns the set of all reductions applicable in the given LR(1) state *)letfrom_lr1glr1=g.reduction_from_lr1.:(lr1)endmoduleTransition=struct(** Returns the cardinality of goto transitions *)letgotog=Vector.lengthg.transition_goto_sym(** Returns the cardinality of all transitions (goto + shift) *)letanyg=Vector.lengthg.transition_source(** Returns the cardinality of shift transitions *)letshiftg=Vector.lengthg.transition_shift_sym(** Inject a goto transition index into the combined transition index space *)letof_goto_gi=Sum.inj_li(** Inject a shift transition index into the combined transition index space *)letof_shiftgi=Sum.inj_r(gotog)i(** Project a transition index into either a goto or shift transition index *)letsplitgi=Sum.prj(gotog)i(** [find_goto s nt] finds the goto transition from state [s] labelled by
nonterminal [nt]. Raises [Invalid_argument] if no such transition exists. *)letfind_gotoglr1nt=matchIndexMap.find_optntg.transition_goto_table.:(lr1)with|Somegt->gt|None->Printf.ksprintfinvalid_arg"find_goto(%s, %s)"(Lr1.to_stringglr1)(Nonterminal.to_stringgnt)(** Returns the target state of the goto transition from [lr1] labelled by [nt] *)letfind_goto_targetglr1nt=g.transition_target.:(of_gotog(find_gotoglr1nt))(** Returns the source (origin) state of a transition *)letsourcegi=g.transition_source.:(i)(** Returns the target (destination) state of a transition *)lettargetgi=g.transition_target.:(i)(** Returns the grammar symbol that labels a transition *)letsymbolgi=matchsplitgiwith|Li->Sum.inj_rg.terminal_ng.transition_goto_sym.:(i)|Ri->Sum.inj_lg.transition_shift_sym.:(i)(** Returns the nonterminal that labels a goto transition *)letgoto_symbolgi=g.transition_goto_sym.:(i)(** Returns the terminal that labels a shift transition *)letshift_symbolgi=g.transition_shift_sym.:(i)(** Returns the set of outgoing transitions from state [i] *)letsuccessorsgi=g.transition_successors.:(i)(** Returns the set of incoming transitions to state [i] *)letpredecessorsgi=g.transition_predecessors.:(i)(** Returns the set of accepting transitions: goto transitions from initial
to accepting states, recognizing completion of a grammar entrypoint. *)letacceptingg=g.transition_accepting(** Converts a transition to a debug string of the form "source -> target" *)letto_stringgtr=Printf.sprintf"%s -> %s"(Lr1.to_stringg(sourcegtr))(Lr1.to_stringg(targetgtr))(** [find g src tgt] finds the transition from [src] to [tgt], if one exists.
Returns the transition index, assuming at most one transition between any pair. *)letfindgsrctgt=letinter=IndexSet.inter(successorsgsrc)(predecessorsgtgt)inassert(IndexSet.is_emptyinter||IndexSet.is_singletoninter);IndexSet.minimuminterend