Source file encoded_word.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
open Core
open Angstrom
module Let_syntax = struct
let bind t ~f = t >>= f
let map t ~f = t >>| f
let both a b = lift2 Tuple2.create a b
end
let ws = take_while1 Char.is_whitespace
let charset =
choice
[ string_ci "US-ASCII" >>| const `Ascii
; string_ci "UTF-8" >>| const `Utf8
; string_ci "ISO-8859-1" >>| const `Latin1
; string_ci "ISO-8859-2" >>| const `Latin2
; string_ci "GB2312" >>| const `GB2312
; string_ci "WINDOWS-1252" >>| const `Windows1252
]
;;
let encoding : [ `Base64 | `Quoted_printable ] Angstrom.t =
choice [ string_ci "B" >>| const `Base64; string_ci "Q" >>| const `Quoted_printable ]
;;
let parser_ : string Angstrom.t =
let%bind () = string "=?" >>| ignore
and charset = charset
and () = string "?" >>| ignore
and encoding = encoding
and () = string "?" >>| ignore
and data =
take_while (function
| '?' -> false
| c -> (not (Char.is_whitespace c)) && Char.is_print c)
and () = string "?=" >>| ignore in
let%bind data =
match encoding with
| `Quoted_printable ->
let data = String.substr_replace_all data ~pattern:"_" ~with_:" " in
let data_bstr, _ =
Quoted_printable_lexer.decode_quoted_printable
(String.length data)
(Lexing.from_string data)
in
return (Bigbuffer.contents data_bstr)
| `Base64 ->
(match Base64.decode data with
| Ok data -> return data
| Error (`Msg msg) -> fail msg)
in
match charset with
| `Ascii | `Utf8 | `Latin1 | `Latin2 | `GB2312 | `Windows1252 -> return data
;;
let parser_many : string Angstrom.t =
many
(choice
[ (let%map hd = parser_
and tl =
many
(let%bind (_ : string) = option "" ws in
parser_)
in
hd :: tl)
; (let%map c =
choice
[ take_while1 (function
| '=' -> false
| c -> not (Char.is_whitespace c))
; string "="
; (let%bind (_ : string) = choice [ string "\r\n"; string "\n" ] in
ws)
; ws
]
in
[ c ])
])
>>| List.concat
>>| String.concat ~sep:""
;;
let decode str =
Angstrom.parse_string ~consume:Prefix parser_many str
|> Result.map_error ~f:Error.of_string
;;