123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149openAngstromletis_ascii=function'\000'..'\127'->true|_->falseexceptionNot_satisfytypet={normalized:string;raw:string}letppppft=Fmt.pfppf"{ @[<hov>normalized = %s;@ raw = @[<hov>%a@]@] }"t.normalizedUtils.pp_stringt.rawletfailf=Fmt.kstrffailletwith_uutfis=letres=Buffer.create16inlettmp=Bytes.create1inletdec=Uutf.decoder~encoding:`UTF_8`Manualinletnot_satisfy=reffalseinscan(Uutf.decodedec)(funstatechr->letres=matchstatewith|`Await->Somestate|`End->None|`Ucharuchar->Uutf.Buffer.add_utf_8resuchar;Somestate|`Malformed_->Uutf.Buffer.add_utf_8resUutf.u_rep;Somestateinletres=matchreswith|Somestate->if(is_asciichr&&ischr)||not(is_asciichr)then(Bytes.unsafe_settmp0chr;Uutf.Manual.srcdectmp01;Some(Uutf.decodedec))else(not_satisfy:=true;None)|None->Noneinres)>>=fun(consumed,state)->match!not_satisfywith|false->(* assert (state = `End) ;
TODO: assert false with [parse_string unstructured
"p=MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC24+ \
oWY3VEvkeJ8ZYCpp00YX61+Yyya6mgxgx6fbjUqAgaaqq \
DdQoByt05XUYMKFV7Zs+wbtqNlJe15jUActNAu06LQnrM \
Hhtdmepad/8jYR8YRhoPppKG6MmDlIRkzcmAA/E8ZZF7h \
gaAiOvCtnoTY0/ZTabr3wC9NPpiL5tn1QIDAQAB" *)letnormalized=Buffer.contentsresinBuffer.clearres;return{normalized;raw=consumed;}|true->(* XXX(dinosaure): if we retrieve [not_satisfy = true], [state] was already
computed by [scan]. We need to signal to [dec] end of input and compute
returned and last state. *)Uutf.Manual.srcdecBytes.empty00;(* `End *)matchUutf.decodedecwith|`Await->assertfalse|`End->letnormalized=Buffer.contentsresinBuffer.clearres;return{normalized;raw=consumed;}|`Ucharuchar->assert(Uutf.decodedec=`End);Uutf.Buffer.add_utf_8resuchar;letnormalized=Buffer.contentsresinBuffer.clearres;return{normalized;raw=consumed;}|`Malformed_->assert(Uutf.decodedec=`End);Uutf.Buffer.add_utf_8resUutf.u_rep;letnormalized=Buffer.contentsresinBuffer.clearres;return{normalized;raw=consumed;}(*
let with_uutf is =
let res = Buffer.create 16 in
let raw = Buffer.create 16 in
let tmp = Bytes.create 1 in
let dec = Uutf.decoder ~encoding:`UTF_8 `Manual in
let cut = ref false in
scan (Uutf.decode dec) (fun state chr ->
try
let () =
match state with
| `Await | `End -> ()
| `Malformed _ -> Uutf.Buffer.add_utf_8 res Uutf.u_rep
| `Uchar uchar when Uchar.is_char uchar ->
if is (Uchar.to_char uchar) then
Buffer.add_char res (Uchar.to_char uchar)
else raise Not_satisfy
| `Uchar uchar -> Uutf.Buffer.add_utf_8 res uchar
in
Bytes.set tmp 0 chr ;
Uutf.Manual.src dec tmp 0 1 ;
if is_ascii chr && not (is chr) then (
cut := true ;
raise Not_satisfy ) ;
Buffer.add_char raw chr ; (* valid [char]. *)
Some (Uutf.decode dec)
with Not_satisfy -> None )
>>= fun (_, state) ->
( match state with
| `Await ->
Uutf.Manual.src dec tmp 0 1 ;
let () =
match Uutf.decode dec with
| `Await | `Malformed _ -> Uutf.Buffer.add_utf_8 res Uutf.u_rep
| `Uchar uchar when Uchar.is_char uchar ->
if is (Uchar.to_char uchar) then
Buffer.add_char res (Uchar.to_char uchar)
| `Uchar uchar -> Uutf.Buffer.add_utf_8 res uchar
| `End -> ()
in
return { normalized= Buffer.contents res
; raw= Buffer.contents raw }
| `Malformed _ ->
Uutf.Buffer.add_utf_8 res Uutf.u_rep ;
return { normalized= Buffer.contents res
; raw= Buffer.contents raw }
| `Uchar uchar when Uchar.is_char uchar ->
if (not !cut) && is (Uchar.to_char uchar) then
Buffer.add_char res (Uchar.to_char uchar) ;
return { normalized= Buffer.contents res
; raw= Buffer.contents raw }
| `Uchar uchar ->
Uutf.Buffer.add_utf_8 res uchar ;
return { normalized= Buffer.contents res
; raw= Buffer.contents raw }
| `End -> return { normalized= Buffer.contents res
; raw= Buffer.contents raw } )
>>= fun r -> Buffer.clear res ; Buffer.clear raw ; return r
*)letwith_uutf_without_rawis=with_uutfis>>|fun{normalized;_}->normalizedletwith_uutf1is=with_uutfis>>=funr->ifString.lengthr.raw>0thenreturnrelsefailf"with_uutf1: string is empty @[<hov>%a@]"pprletwith_uutf1_without_rawis=with_uutf1is>>|fun{normalized;_}->normalized