123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158(* MIT License
*
* Copyright (c) 2025 Frédéric Bour
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
*
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*)(** Compact representation of reduction positions
This module provides an efficient compact representation for positions in
reductions of the form (n, A) where:
- n is the number of stack elements to pop
- A is the nonterminal symbol for the goto transition
The position is interpreted as: "pop n elements from the stack before
following the goto transition labeled A".
Design and implementation:
- The table structure uses a contiguous memory layout where positions for
each nonterminal are allocated consecutively. This enables O(1) access
and efficient cache usage.
- [inj] converts a (nonterminal, position) pair to an index into the
table.
- [prj] converts an index back to (nonterminal, position).
- [previous] returns whether the position is at the start of a reduction
(Left nt means we're at the start and need to follow goto nt) or in the
middle of a reduction (Right pos gives the previous position).
- [is_zero] checks if we're at the start of a reduction (position 0).
Tricky implementation details:
- The zero position for each nonterminal is stored separately to enable
efficient lookups when we need to start a reduction.
- Index arithmetic is used for compactness: positions are offsets from
the zero position of the associated nonterminal.
- The [previous'] function handles None (for Optional type) in addition
to the regular case, enabling use with optional positions.
*)openUtilsopenMiscopenFix.IndexingopenInfo(*
Compact representation of a position in a reduction
(a pair `(n, A)` interpreted as `pop n elements before
following the goto transition labelled `A`).
*)includeUnsafe_cardinal()(** A position as a (nonterminal, offset) pair.
The offset is the number of stack elements to pop before
following the goto transition labeled by the nonterminal. *)type'gdesc='gnonterminalindex*int(** Table mapping compact indices to (nonterminal, offset) positions.
[desc] stores positions contiguously per nonterminal for O(1) access.
[zero] maps each nonterminal to the index of its position-0 entry,
enabling fast injection of (nt, 0) pairs. *)type'gtable={desc:('gt,'gdesc)vector;zero:('gnonterminal,'gtindex)vector;}(** Build a position table from a grammar.
For each nonterminal, allocates contiguous slots equal to the maximum
RHS length of its productions, plus a sentinel at index 0.
Total cardinality = 1 + sum of max production lengths per nonterminal. *)letmake(typeg)(g:ggrammar):gtable=letlength=Vector.make(Nonterminal.cardinalg)0inIndex.iter(Production.cardinalg)(funprod->length.@(Production.lhsgprod)<-Int.max(Production.lengthgprod));letopenConst(structtypet=gletcardinal=Vector.fold_left(+)(1+Vector.length_as_intlength)lengthend)inletdesc=Vector.make'n(fun()->Index.of_int(Nonterminal.cardinalg)0,0)inletenum=Index.enumerateninletzero=Vector.mapi(funntcount->letzero=enum()indesc.:(zero)<-(nt,0);fori=1tocountdodesc.:(enum())<-(nt,i);done;zero)lengthin{desc;zero}(** Inject a (nonterminal, offset) pair into a compact table index.
Raises [Assert_failure] if the offset is out of range. *)letinj(typeg)(p:gtable)ntpos=assert(pos>=0);letp0=p.zero.:(nt)inletpn=Index.of_int(Vector.lengthp.desc)((p0:>int)+pos)inlet(nt',pos')=p.desc.:(pn)inassert(Index.equalntnt');assert(pos=pos');pn(** Project a compact table index back to its (nonterminal, offset) pair. *)letprj(typeg)(p:gtable)pos=p.desc.:(pos)(** Navigate to the predecessor position in a reduction.
Returns [Left nt] if at position 0 for nonterminal [nt] (start of
a reduction — follow the goto transition labeled [nt]).
Returns [Right pos'] if at position > 0 (middle of a reduction —
[pos'] is the previous position, obtained via O(1) [Index.pred]). *)letprevious(typeg)(p:gtable)pos=matchp.desc.:(pos)with|(nt,0)->Either.Leftnt|_->Either.Right(Option.get(Index.predpos))(** Like [previous] but accepts an optional index.
Returns [Right Opt.none] when the input is [None].
Used in reduction graphs where positions may be absent. *)letprevious'(typeg)(p:gtable)pos=matchOpt.prjposwith|None->Either.RightOpt.none|Somepos'->matchp.desc.:(pos')with|(nt,0)->Either.Leftnt|_->Either.Right(Option.get(Index.predpos))(** Check whether the position is at offset 0 (start of a reduction). *)letis_zero(typeg)(p:gtable)pos=let_,pos=p.desc.:(pos)in(pos=0)