Source file punycode.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
(** Punycode (RFC 3492) codec. *)

let base = 36
let tmin = 1
let tmax = 26
let skew = 38
let damp = 700
let initial_bias = 72
let initial_n = 0x80

let decode_digit c =
  match c with
  | '0'..'9' -> Char.code c - 22
  | 'A'..'Z' -> Char.code c - 65
  | 'a'..'z' -> Char.code c - 97
  | _ -> -1

let adapt delta num_points first_time =
  let delta = ref (if first_time then delta / damp else delta asr 1) in
  delta := !delta + !delta / num_points;
  let k = ref 0 in
  while !delta > ((base - tmin) * tmax) / 2 do
    delta := !delta / (base - tmin);
    k := !k + base
  done;
  !k + (base - tmin + 1) * !delta / (!delta + skew)

let decode input =
  if String.length input = 0 then Error "empty input"
  else
  let basic, encoded =
    match String.rindex_opt input '-' with
    | Some pos ->
      (String.sub input 0 pos,
       String.sub input (pos + 1) (String.length input - pos - 1))
    | None -> ("", input)
  in
  let output = Array.make (String.length input * 2 + 10) 0 in
  let out_len = ref 0 in
  (* Copy basic codepoints โ€” must all be <= 0x7F per RFC 3492 ยง5 *)
  let basic_ok = ref true in
  String.iter (fun c ->
    let code = Char.code c in
    if code > 0x7F then basic_ok := false
    else begin
      output.(!out_len) <- code;
      incr out_len
    end
  ) basic;
  if not !basic_ok then
    Error "non-basic byte in basic segment"
  else
  let n = ref initial_n in
  let i = ref 0 in
  let bias = ref initial_bias in
  let ic = ref 0 in
  let len = String.length encoded in
  try
    while !ic < len do
      let oldi = !i in
      let w = ref 1 in
      let k = ref base in
      let cont = ref true in
      while !cont do
        if !ic >= len then raise Exit;
        let digit = decode_digit encoded.[!ic] in
        incr ic;
        if digit < 0 || digit >= base then raise Exit;
        i := !i + digit * !w;
        let t =
          if !k <= !bias + tmin then tmin
          else if !k >= !bias + tmax then tmax
          else !k - !bias
        in
        if digit < t then cont := false
        else begin
          w := !w * (base - t);
          k := !k + base
        end
      done;
      let out = !out_len + 1 in
      bias := adapt (!i - oldi) out (oldi = 0);
      n := !n + !i / out;
      if !n > 0x10FFFF then raise Exit;
      i := !i mod out;
      (* Insert n at position i *)
      let pos = !i in
      Array.blit output pos output (pos + 1) (!out_len - pos);
      output.(pos) <- !n;
      out_len := out;
      i := !i + 1
    done;
    Ok (Array.to_list (Array.sub output 0 !out_len))
  with Exit -> Error "invalid punycode"

let encode_digit d =
  if d < 26 then Char.chr (d + 97)      (* a-z *)
  else Char.chr (d - 26 + 48)           (* 0-9 *)

let encode input =
  try
  let buf = Buffer.create 64 in
  let n = ref initial_n in
  let delta = ref 0 in
  let bias = ref initial_bias in
  (* Copy basic codepoints (lowercased per Section 7.1) *)
  let basic_count = ref 0 in
  List.iter (fun cp ->
    if cp < 0x80 then begin
      let c = Char.chr cp in
      Buffer.add_char buf (Char.lowercase_ascii c);
      incr basic_count
    end
  ) input;
  if !basic_count > 0 then Buffer.add_char buf '-';
  let h = ref !basic_count in
  let len = List.length input in
  while !h < len do
    (* Find minimum codepoint >= n *)
    let m = ref max_int in
    List.iter (fun cp -> if cp >= !n && cp < !m then m := cp) input;
    (* Increase delta for skipped codepoints โ€” overflow check (Section 6.4) *)
    let step = !m - !n in
    if step > (max_int - !delta) / (!h + 1) then
      raise Exit;
    delta := !delta + step * (!h + 1);
    n := !m;
    List.iter (fun cp ->
      if cp < !n then incr delta
      else if cp = !n then begin
        (* Encode delta as variable-length integer *)
        let q = ref !delta in
        let k = ref base in
        let cont = ref true in
        while !cont do
          let t =
            if !k <= !bias + tmin then tmin
            else if !k >= !bias + tmax then tmax
            else !k - !bias
          in
          if !q < t then begin
            Buffer.add_char buf (encode_digit !q);
            cont := false
          end else begin
            Buffer.add_char buf (encode_digit (t + (!q - t) mod (base - t)));
            q := (!q - t) / (base - t);
            k := !k + base
          end
        done;
        bias := adapt !delta (!h + 1) (!h = !basic_count);
        delta := 0;
        incr h
      end
    ) input;
    incr delta;
    incr n
  done;
  Ok (Buffer.contents buf)
  with Exit -> Error "overflow"