Source file lrc.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
(* MIT License
 *
 * Copyright (c) 2025 Frédéric Bour
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *)

(** LRC (LR with classes) — implementation.

    Constructs the LRC graph by iterating over LR1 states and their lookahead
    classes, building transition relations, and computing transitive closure
    via Tarjan's algorithm. Provides two construction paths: the full LRC
    ([make]) and a minimized version ([make_minimal]) using Valmari's DFA
    minimization algorithm.

    Implementation notes:
    - [make] allocates LRC states contiguously per LR1 state, using index
      offsets to encode the (LR1 state, class index) pairing efficiently.
    - [make_minimal] builds the LRC graph on-the-fly during a backward
      traversal from LR1 states, then minimizes with Valmari's algorithm.
    - [from_entrypoints] computes reachability from a subset of states and
      lazily evaluates prefix paths for counterexample generation.
    - FIXME: Prefixes are minimal in number of symbols, not in number of
      terminals, which is ultimately what we care about when generating
      counter-examples. A short prefix can expand to a long sentence.
*)

open Utils
open Misc
open Fix.Indexing
open Info

type ('g, 'n) t = {
  lr1_of: ('n, 'g lr1 index) vector;
  lrcs_of: ('g lr1, 'n indexset) vector;
  all_wait: 'n indexset;
  all_leaf: 'n indexset;
  all_successors: ('n, 'n indexset) vector;
  reachable_from: ('n, 'n indexset) vector;
}

(* Number of LRC states in the structure *)
let count t = Vector.length t.lr1_of

(* Compute the difference between two indices *)
let index_delta (type n) (i : n index) (j : n index) =
  (i :> int) - (j :> int)

(* Compute the index of the class of an LRC state *)
let class_index t lrc =
  index_delta lrc (Option.get (IndexSet.minimum t.lrcs_of.:(t.lr1_of.:(lrc))))

module N = Unsafe_cardinal()
type 'g n = 'g N.t

(* Build the LRC structure from grammar and reachability analysis.
     Creates one LRC state per (LR1 state, lookahead class) pair, computes
     transition relations, and closes them transitively with Tarjan's algorithm. *)
let make (type g) (g : g grammar) ((module Reachability) : g Reachability.t) =
  (* Compute the total number of LRC states *)
  let n =
    let count lr1 = Array.length (Reachability.Classes.for_lr1 lr1) in
    let sum = ref 0 in
    Index.iter (Lr1.cardinal g) (fun lr1 -> sum := !sum + count lr1);
    !sum
  in

  (* Lift `n` to type-level *)
  let open N.Const(struct type t = g let cardinal = n end) in

  (* Shift an index by a given offset *)
  let index_shift (i : n index) offset =
    Index.of_int n ((i :> int) + offset)
  in

  (* Map from LRC states to their corresponding LR1 states *)
  let lr1_of = Vector.make' n (fun () -> Index.of_int (Lr1.cardinal g) 0) in

  (* Map from LR1 states to their corresponding set of LRC states *)
  let lrcs_of =
    let count = ref 0 in
    let init_lr1 lr1 =
      let classes = Reachability.Classes.for_lr1 lr1 in
      assert (Array.length classes > 0);
      let first = Index.of_int n !count in
      count := !count + Array.length classes;
      let all = ref IndexSet.empty in
      for i = Array.length classes - 1 downto 0 do
        let lrc = index_shift first i in
        all := IndexSet.add lrc !all;
        Vector.set lr1_of lrc lr1
      done;
      !all
    in
    Vector.init (Lr1.cardinal g) init_lr1
  in

  (* Map from LR1 states to their first LRC state *)
  let first_lrc_of =
    Vector.map (fun x -> Option.get (IndexSet.minimum x)) lrcs_of
  in

  (* Set of wait LRC states *)
  let all_wait = IndexSet.map (Vector.get first_lrc_of) (Lr1.wait g) in

  (* Step timing after computing the LRC set *)
  stopwatch 2 "Allocated LRC set (%d elements)" (cardinal n);

  (* Compute transitions for each LRC state *)
  let predecessors = Vector.make n IndexSet.empty in
  let process lr1 =
    let tgt_first = first_lrc_of.:(lr1) in
    match Option.map (Symbol.desc g) (Lr1.incoming g lr1) with
    | None -> ()
    | Some (T t) ->
      predecessors.:(tgt_first) <-
        IndexSet.map (fun tr ->
            let src = Transition.source g tr in
            let classes = Reachability.Classes.for_lr1 src in
            let class_index _ classe = IndexSet.mem t classe in
            index_shift first_lrc_of.:(src)
              (Misc.array_findi class_index 0 classes)
          ) (Transition.predecessors g lr1)
    | Some _ ->
      let process_transition tr =
        let node = Reachability.Tree.leaf tr in
        let encode = Reachability.Cell.encode node in
        let pre_classes = Reachability.Classes.pre_transition tr in
        let post_classes = Reachability.Classes.post_transition tr in
        let coercion =
          Reachability.Coercion.infix post_classes
            (Reachability.Classes.for_lr1 lr1)
        in
        let src_first = first_lrc_of.:(Transition.source g tr) in
        let pre_classes = Array.length pre_classes in
        let post_classes = Array.length post_classes in
        for post = 0 to post_classes - 1 do
          let reachable = ref IndexSet.empty in
          for pre = 0 to pre_classes - 1 do
            if Reachability.Analysis.cost (encode ~pre ~post) < max_int then
              reachable := IndexSet.add (index_shift src_first pre) !reachable
          done;
          let reachable = !reachable in
          Array.iter (fun index ->
              predecessors.@(index_shift tgt_first index) <- IndexSet.union reachable)
            coercion.forward.(post)
        done
      in
      IndexSet.rev_iter process_transition (Transition.predecessors g lr1)
  in
  Index.rev_iter (Lr1.cardinal g) process;
  stopwatch 2 "Computed LRC transitions/predecessors";
  let all_successors = Misc.relation_reverse n predecessors in
  stopwatch 2 "Computed LRC successors";
  let reachable_from = Vector.copy all_successors in
  Tarjan.close_relation (fun f i -> IndexSet.iter f all_successors.:(i)) reachable_from;
  stopwatch 2 "Closed LRC successors";
  let all_leaf = IndexSet.all n in
  {lr1_of; lrcs_of; all_wait; all_leaf; all_successors; reachable_from}

(* Convert an LRC state to a string representation *)
let to_string g t lrc =
  Printf.sprintf "%s/%d"
    (Lr1.to_string g t.lr1_of.:(lrc))
    (class_index t lrc)

(* Convert a set of LRC states to a string representation *)
let set_to_string info t lrcs =
  string_of_indexset ~index:(to_string info t) lrcs

type 'n entrypoints = {
  reachable: 'n indexset;
  wait: 'n indexset;
  entrypoints: 'n indexset;
  successors: ('n, 'n indexset) vector;
  predecessors: ('n, 'n indexset) vector;
  some_prefix: 'n index -> int * 'n index list;
  (** [some_prefix state] returns a prefix to reach [state] from an entrypoint.
      The prefix's length and the sequence of states (excluding [state]) are
      given, starting from the end.
      Thus, [List.rev (state :: some_prefix state)] is a valid prefix. *)
}

(* Find states reachable from specific states by closing over raw reachability
   information *)
let from_entrypoints (type g n) (g: g grammar) lrc_graph entrypoints : n entrypoints =
  (* Set of reachable states *)
  let reachable = ref IndexSet.empty in
  (* Compute transitive successors for each state and populate reachable set *)
  let successors =
    let table = Vector.make (count lrc_graph) IndexSet.empty in
    let todo = ref entrypoints in
    let populate i =
      reachable := IndexSet.add i !reachable;
      if IndexSet.is_empty table.:(i) then (
        let succ = lrc_graph.all_successors.:(i) in
        todo := IndexSet.union succ !todo;
        Vector.set table i succ;
      )
    in
    while IndexSet.is_not_empty !todo do
      let todo' = !todo in
      todo := IndexSet.empty;
      IndexSet.rev_iter populate todo';
    done;
    table
  in

  (* Compute predecessors for each state using successors information *)
  let predecessors = Misc.relation_reverse (count lrc_graph) successors in

  (* Final reachable states *)
  let reachable = !reachable in

  (* Wait states that are reachable *)
  let wait = IndexSet.inter lrc_graph.all_wait reachable in

  (* Compute a prefix to reach each state.
     The prefix is represented as (length, path) to avoid reconstructing
     the path when only the length is needed for coverage analysis. *)
  let some_prefix =
    let table = lazy (
      let table = Vector.make (count lrc_graph) (0, []) in
      let todo = ref [] in
      let expand prefix state =
        match Vector.get table state with
        | (0, []) ->
          Vector.set table state prefix;
          let length, prefix = prefix in
          let prefix = (length + 1, state :: prefix) in
          let succ = successors.:(state) in
          if IndexSet.is_not_empty succ then
            push todo (succ, prefix)
        | _ -> ()
      in
      Vector.iteri (fun lr1 lrcs ->
          if Option.is_none (Lr1.incoming g lr1) then
            expand (0, []) (Option.get (IndexSet.minimum lrcs)))
        lrc_graph.lrcs_of;
      let propagate (succ, prefix) =
        IndexSet.iter (expand prefix) succ
      in
      fixpoint ~propagate todo;
      table
    )
    in
    (fun st -> Vector.get (Lazy.force table) st)
  in
  {reachable; wait; entrypoints; successors; predecessors; some_prefix}

(* Debugging: verify the LRC automaton is deterministic by traversing all
     (LR1 state, class) pairs backward and checking for dead-ends. *)
let check_deterministic (type g) (g : g grammar) ((module Reachability) : g Reachability.t) =
  let prefix = Vector.make (Lr1.cardinal g) [] in
  let rec loop next = function
    | [] -> if not (list_is_empty next) then loop [] next
    | xs :: xxs ->
      let x = List.hd xs in
      let next =
        if list_is_empty prefix.:(x)
        then (
          prefix.:(x) <- xs;
          IndexSet.fold
            (fun tr next -> (Transition.target g tr :: xs) :: next)
            (Transition.successors g x) next
        ) else next
      in
      loop next xxs
  in
  loop [] (List.map (fun x -> [x]) (IndexSet.elements (Lr1.entrypoints g)));
  let todo = ref [] in
  let table = Hashtbl.create 7 in
  let visit path lr1 classes =
    let key = (lr1, classes) in
    let path = key :: path in
    let print_class cl =
      if cl == Terminal.all g then
        "T"
      else
        string_concat_map ~wrap:("{","}") ","
          (Terminal.to_string g)
          (List.rev (IndexSet.elements cl))
    in
    let print (lr1, classes) =
      let all_classes = Reachability.Classes.for_lr1 lr1 in
      Printf.sprintf "%s@{%s}"
        (Lr1.to_string g lr1)
        (string_concat_map ", "
           (fun i -> print_class all_classes.(i))
           (IntSet.elements classes))
    in
    if IntSet.is_empty classes then
      Printf.eprintf "Found dead-end: %s\n prefix: %s\n starting classes: %s\n"
        (string_concat_map " -> " print path)
        (Lr1.list_to_string g (List.rev (List.tl prefix.:(lr1))))
        (let top, _ = List.hd (List.rev path) in
         string_concat_map ~wrap:("{","}") ","
           print_class
           (Array.to_list (Reachability.Classes.for_lr1 top))
        )
    else if not (Hashtbl.mem table key) then (
      Hashtbl.add table key ();
      push todo path
    )
  in
  let propagate path =
    let (target, post_class_indices) = List.hd path in
    IndexSet.iter begin fun tr ->
      let source = Transition.source g tr in
      let node = Reachability.Tree.leaf tr in
      let encode = Reachability.Cell.encode node in
      let pre_classes = Reachability.Classes.for_lr1 source in
      let pre_class_indices =
        match Transition.split g tr with
        | R sh ->
          let term = Transition.shift_symbol g sh in
          let pre =
            Utils.Misc.array_findi
              (fun _ cb -> IndexSet.mem term cb)
              0 pre_classes
          in
          if Reachability.Analysis.cost (encode ~pre ~post:0) < max_int then
            IntSet.singleton pre
          else
            IntSet.empty
        | L gt ->
          let post_classes = Reachability.Classes.for_lr1 target in
          let mid_classes = Reachability.Classes.for_edge gt in
          IntSet.bind post_class_indices begin fun post_index ->
            let ca = post_classes.(post_index) in
            match
              Utils.Misc.array_findi
                (fun _ cb -> IndexSet.quick_subset ca cb) 0 mid_classes
            with
            | exception Not_found -> IntSet.empty
            | mid_index ->
              IntSet.init_subset 0 (Array.length pre_classes - 1)
                (fun pre -> Reachability.Analysis.cost (encode ~pre ~post:mid_index) < max_int)
          end
      in
      visit path source pre_class_indices
    end (Transition.predecessors g target)
  in
  Index.iter (Lr1.cardinal g) (fun lr1 ->
      let len = Array.length (Reachability.Classes.for_lr1 lr1) in
      if len > 0 then
        visit [] lr1 (IntSet.init_interval 0 (len - 1))
    );
  fixpoint ~propagate todo;
  stopwatch 2 "Determinized Lrc all: %d states" (Hashtbl.length table)

module Mlrc = Unsafe_cardinal()
type 'g mlrc = 'g Mlrc.t

(* Build a minimal LRC structure.
     Constructs the graph lazily during backward traversal, then applies
     Valmari's DFA minimization to merge equivalent states. *)
let make_minimal (type g) (g : g grammar) ((module Reachability) : g Reachability.t) : (g, g mlrc) t =
  let open IndexBuffer in
  let module State = Gen.Make() in
  let module Transitions = Gen.Make() in
  let states = State.get_generator () in
  let transitions = Transitions.get_generator () in
  let table = Hashtbl.create 7 in
  let todo = ref [] in
  let visit lr1 classes =
    assert (not (IntSet.is_empty classes));
    let key = (lr1, classes) in
    match Hashtbl.find_opt table key with
    | Some index -> index
    | None ->
      let index = Gen.add states key in
      Hashtbl.add table key index;
      push todo index;
      index
  in
  let propagate index =
    let (target, post_class_indices) = Gen.get states index in
    IndexSet.iter begin fun tr ->
      let source = Transition.source g tr in
      let node = Reachability.Tree.leaf tr in
      let encode = Reachability.Cell.encode node in
      let pre_classes = Reachability.Classes.for_lr1 source in
      let pre_class_indices =
        match Transition.split g tr with
        | R sh ->
          let term = Transition.shift_symbol g sh in
          let pre =
            Utils.Misc.array_findi
              (fun _ cb -> IndexSet.mem term cb)
              0 pre_classes
          in
          if Reachability.Analysis.cost (encode ~pre ~post:0) < max_int then
            IntSet.singleton pre
          else
            IntSet.empty
        | L gt ->
          let post_classes = Reachability.Classes.for_lr1 target in
          let mid_classes = Reachability.Classes.for_edge gt in
          IntSet.bind post_class_indices begin fun post_index ->
            let ca = post_classes.(post_index) in
            match
              Utils.Misc.array_findi
                (fun _ cb -> IndexSet.quick_subset ca cb) 0 mid_classes
            with
            | exception Not_found -> IntSet.empty
            | mid_index ->
              IntSet.init_subset 0 (Array.length pre_classes - 1)
                (fun pre -> Reachability.Analysis.cost (encode ~pre ~post:mid_index) < max_int)
          end
      in
      if not (IntSet.is_empty pre_class_indices) then
        let index' = visit source pre_class_indices in
        ignore (Gen.add transitions (index, index'))
    end (Transition.predecessors g target)
  in
  let fast_map s f =
    let l = IndexSet.fold (fun x acc -> match f x with None -> acc | Some y -> y :: acc) s [] in
    IndexSet.of_list l
  in
  let visit_state lr1 =
    let len = Array.length (Reachability.Classes.for_lr1 lr1) in
    if len > 0 then
      let set = IntSet.init_interval 0 (len - 1) in
      Some (visit lr1 set)
    else
      None
  in
  let all_wait = fast_map (Lr1.wait g) visit_state in
  fixpoint ~propagate todo;
  stopwatch 2 "Determinized Lrc wait: %d states" (Hashtbl.length table);
  let all_leaf = fast_map (Lr1.all g) visit_state in
  fixpoint ~propagate todo;
  stopwatch 2 "Determinized Lrc all: %d states" (Hashtbl.length table);
  (*IndexSet.iter (fun st ->
      Printf.eprintf "lrc%d has label %s\n" (Index.to_int st) (Lr1.to_string g (fst (Gen.get states st)))
    ) all_wait;*)
  (*Hashtbl.iter (fun (lr1, classes) state ->
      if IndexSet.mem lr1 (Lr1.entrypoints g) then (
        Printf.eprintf
          "LRC%d represent entrypoint %s with classes %s (lr1 has %d classes)\n"
          (Index.to_int state)
          (Lr1.to_string g lr1)
          (string_concat_map ~wrap:("{","}") "," string_of_int (IntSet.elements classes))
          (Array.length (Reachability.Classes.for_lr1 lr1))
      )
    ) table;*)
  let module Min =
    Valmari.Minimize(struct
      type t = g lr1 index
      let compare = Index.compare
    end)(struct
      let source tr = fst (Gen.get transitions tr)
      let target tr = snd (Gen.get transitions tr)
      let label tr = fst (Gen.get states (source tr))

      let initials f = IndexSet.iter f all_leaf

      let finals f =
        IndexSet.iter (fun lr1 ->
            match Hashtbl.find_opt table (lr1, IntSet.singleton 0) with
            | None -> assert false
            | Some state ->
              (*Printf.eprintf "Marking entrypoint %d: %s\n" (Index.to_int state) (Lr1.to_string g lr1);*)
              f state
          ) (Lr1.entrypoints g)

      let refinements f =
        IndexSet.iter (fun lr1 ->
            match Hashtbl.find_opt table (lr1, IntSet.singleton 0) with
            | None -> ()
            | Some index -> f (fun ~add -> add index)
          ) (Lr1.entrypoints g)

      type states = State.n
      let states = State.n
      type transitions = Transitions.n
      let transitions = Transitions.n
    end)
  in
  stopwatch 2 "Minimized deterministic Lrc: %d states" (cardinal Min.states);
  (* Lr1 of each state *)
  let lr1_of =
    Vector.init Min.states
      (fun st -> fst (Gen.get states (Min.represent_state st)))
  in
  (* Lrcs of each lr1 state *)
  let lrcs_of = Vector.make (Lr1.cardinal g) IndexSet.empty in
  Vector.rev_iteri (fun lrc lr1 -> lrcs_of.@(lr1) <- IndexSet.add lrc)
    lr1_of;
  (* Sanity check: single lr1 per lrc *)
  Index.iter State.n (fun st ->
      match Min.transport_state st with
      | None -> ()
      | Some mst ->
        let st' = Min.represent_state mst in
        assert (st = st' || fst (Gen.get states st) = lr1_of.:(mst))
    );
  let all_wait = IndexSet.filter_map Min.transport_state all_wait in
  let all_leaf = IndexSet.filter_map Min.transport_state all_leaf in
  let all_successors = Vector.make Min.states IndexSet.empty in
  Index.rev_iter Min.transitions
    (fun tr -> all_successors.@(Min.target tr) <- IndexSet.add (Min.source tr));
  (* Reachable *)
  let reachable_from = Vector.copy all_successors in
  Tarjan.close_relation
    (fun f i -> IndexSet.iter f all_successors.:(i))
    reachable_from;
  stopwatch 2 "Minimal Lrc ready";
  (* Lift `Min.states` to type-level *)
  let open Mlrc.Eq(struct type t = g type n = Min.states let n = Min.states end) in
  let Refl = eq in
  {lr1_of; lrcs_of; all_wait; all_leaf; all_successors; reachable_from}