123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260openCoreopenAsync_smtp_typesmoduleEnvelope=Smtp_envelopemoduleCrypto=Crypto.CryptokitmoduleHash=Crypto.HashmoduleConfig=structmoduleHeader_cond=structtypet={name:Email_headers.Stable.Name.V1.t;if_:[`Containsofstring]option[@sexp.option]}[@@derivingsexp]endmoduleListed_header_cond=structtypet={name:Email_headers.Stable.Name.V1.t;if_:[`Containsofstring]option[@sexp.option];remove_duplicates:unitoption[@sexp.option]}[@@derivingsexp]endtypet={strip_whitespace:unitoption[@sexp.option];normalize_whitespace:Header_cond.tlist[@sexp.list];filter:Header_cond.tlist[@sexp.list];mask:Header_cond.tlist[@sexp.list];hash:Header_cond.tlist[@sexp.list];dedup:Header_cond.tlist[@sexp.list];sort_emails:Listed_header_cond.tlist[@sexp.list];sort_words:Listed_header_cond.tlist[@sexp.list];sort:bool[@sexp.bool]}[@@derivingsexp]letdefault={strip_whitespace=None;normalize_whitespace=[];filter=[];mask=[];hash=[];dedup=[];sort_emails=[];sort_words=[];sort=false};;letloadfile=Async.Reader.load_sexp_exnfilet_of_sexpendmoduleHeader=structtypet=Email_headers.Name.t*string[@@derivingcompare]endletmatch_headerconds=letconds=List.foldconds~init:Email_headers.Name.Map.empty~f:(funacc{Config.Header_cond.name;if_}->letcond~name:other~value:_=Email_headers.Name.equalnameotherinletcond=matchif_with|None->cond|Some(`Containss)->letre=Re2.escapes|>Re2.create_exninfun~name~value->cond~name~value&&Re2.matchesrevalueinMap.add_multiacc~key:name~data:cond)infun~name~value->matchMap.findcondsnamewith|Someconds->List.existsconds~f:(funcond->cond~name~value)|None->false;;letmatch_listed_headerconds=letconds=List.foldconds~init:Email_headers.Name.Map.empty~f:(funacc{Config.Listed_header_cond.name;if_;remove_duplicates}->letcond~name:other~value:_=Email_headers.Name.equalnameotherinletcond=matchif_with|None->cond|Some(`Containss)->letre=Re2.escapes|>Re2.create_exninfun~name~value->cond~name~value&&Re2.matchesrevalueinMap.add_multiacc~key:name~data:(cond,remove_duplicates))infun~name~value->matchMap.findcondsnamewith|None->None|Someconds->List.find_mapconds~f:(fun(cond,remove_duplicates)->ifcond~name~valuethenSomeremove_duplicateselseNone);;letstrip_whitespace_headers=Envelope.map_headers~normalize:`None~f:(fun~name:_~value->Email_headers.Value.to_string~normalize:`Whitespacevalue);;letnormalize_whitespaces=letreplacepatternreplacements=letregex=Re2.create_exnpatterninRe2.replace_exnregexs~f:(fun_m->replacement)inletmerge_spacess=replace"\\s\\s*"" "sinletnormalize_commass=replace"\\s*,\\s*"", "sinmerge_spacess|>normalize_commas|>String.strip;;let%expect_test_=lettests=printf"%S\n"(normalize_whitespaces)intest"";[%expect{|""|}];test" ";[%expect{|""|}];test"a b c d, e, f, g";[%expect{|"a b c d, e, f, g"|}];test" a b c\n d, e \r , \n f,g \n ";[%expect{|"a b c d, e, f, g"|}];;letnormalize_whitespace_headerscond=letcond=match_headercondinEnvelope.map_headers~normalize:`None~f:(fun~name~value->ifcond~name~valuethennormalize_whitespacevalueelsevalue);;letfilter_headerscond=letcond=match_headercondinEnvelope.filter_headers~f:(fun~name~value->not(cond~name~value));;lethash_headerscond=lethashdata=data|>Crypto.hash_string(Hash.sha256())|>Util.Hex.to_hex|>sprintf"[hidden : sha256 = %s]"inletcond=match_headercondinEnvelope.map_headers~normalize:`None~f:(fun~name~value->ifcond~name~valuethenhashvalueelsevalue);;letmask_headerscond=letcond=match_headercondinEnvelope.map_headers~normalize:`None~f:(fun~name~value->ifcond~name~valuethen"XXX"elsevalue);;letsort_emails_in_headerpattern=letf~remove_duplicatesvalue=letremove_duplicates=Option.is_someremove_duplicatesinList.sortvalue~compare:Email_address.compare|>(funl->ifremove_duplicatesthenList.remove_consecutive_duplicatesl~equal:Email_address.equalelsel)|>List.map~f:Email_address.to_stringinEnvelope.map_headers~normalize:`None~f:(fun~name~value->matchmatch_listed_headerpattern~name~valuewith|None->value|Someremove_duplicates->(matchEmail_address.list_of_stringvaluewith|Errore->(* Not an error since this is not a reason to trigger the kill
switch. *)Async.Log.Global.info"could not parse %s: %s"value(Error.to_string_hume);value|Okemails->f~remove_duplicatesemails|>String.concat~sep:", "));;letsort_words_in_headerpattern=letf~remove_duplicatesvalue=letremove_duplicates=Option.is_someremove_duplicatesinString.splitvalue~on:' '|>List.filter~f:(funs->not(String.is_emptys))|>List.sort~compare:String.compare|>(funl->ifremove_duplicatesthenList.remove_consecutive_duplicatesl~equal:String.equalelsel)|>String.concat~sep:" "inEnvelope.map_headers~normalize:`None~f:(fun~name~value->matchmatch_listed_headerpattern~name~valuewith|None->value|Someremove_duplicates->f~remove_duplicatesvalue);;letsort_headers=Envelope.modify_headers~f:(funheaders->Email_headers.to_list~normalize:`Noneheaders|>List.stable_sort~compare:Header.compare|>Email_headers.of_list~normalize:`None);;letsort_envelope_recipientsmessage=letrecipients=Envelope.recipientsmessage|>List.stable_sort~compare:Email_address.compareinEnvelope.setmessage~recipients();;letdedup_headersconds=letequal(name1,value1)(name2,value2)=ifmatch_headerconds~name:name1~value:value1&&match_headerconds~name:name2~value:value2thenEmail_headers.Name.equalname1name2elsefalseinEnvelope.modify_headers~f:(funheaders->Email_headers.to_list~normalize:`Noneheaders|>List.remove_consecutive_duplicates~equal|>Email_headers.of_list~normalize:`None);;lettransform{Config.strip_whitespace;normalize_whitespace;filter;sort;dedup;hash;mask;sort_emails;sort_words}message=letmaybe_sortmessage=ifnotsortthenmessageelsemessage|>sort_headers|>sort_envelope_recipientsinletmessage=matchstrip_whitespacewith|None->message|Some()->strip_whitespace_headersmessageinnormalize_whitespace_headersnormalize_whitespacemessage|>filter_headersfilter(* Sorting twice since we want to dedup before masking, to sort before
deduping, and to sort after masking. *)|>maybe_sort|>dedup_headersdedup|>hash_headershash|>mask_headersmask|>sort_emails_in_headersort_emails|>sort_words_in_headersort_words|>maybe_sort;;