Source file norm.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
type behavior = Remove | Isolate | Merge_with_previous | Merge_with_next

let norm matches = function
  | Remove ->
      let fn = function
        | { S.is_match = true; _ } -> None
        | { str; _ } -> Some str in
      Seq.filter_map fn matches
  | Isolate ->
      let fn { S.str; _ } = str in
      Seq.map fn matches
  | Merge_with_previous ->
      let fn (previous_match, acc) elt =
        match (previous_match, acc) with
        | _, [] -> (Some elt, [ elt.S.str ])
        | None, acc -> (Some elt, elt.str :: acc)
        | Some { S.is_match; _ }, prev :: acc ->
            if is_match = false && elt.is_match = true
            then (None, (prev ^ elt.str) :: acc)
            else (Some elt, elt.str :: prev :: acc) in
      let _, sstr = Seq.fold_left fn (None, []) matches in
      List.(to_seq (List.rev sstr))
  | Merge_with_next ->
      let fn (previous_match, acc) elt =
        match (previous_match, acc) with
        | _, [] -> (Some elt, [ elt.S.str ])
        | None, acc -> (Some elt, elt.str :: acc)
        | Some { S.is_match; _ }, prev :: acc ->
            if is_match = true && elt.is_match = false
            then (None, (prev ^ elt.str) :: acc)
            else (Some elt, elt.str :: prev :: acc) in
      let _, sstr = Seq.fold_left fn (None, []) matches in
      List.(to_seq (List.rev sstr))

let split ~encoding ~pattern:(module Pattern : S.PATTERN) ?(behavior = Remove)
    str =
  let matches = Pattern.find_matches ~encoding str in
  norm matches behavior

let split_on_bstr ~encoding ~pattern:(module Pattern : S.PATTERN)
    ?(behavior = Remove) bstr =
  let matches = Pattern.find_matches_on_bstr ~encoding bstr in
  norm matches behavior