123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410(* This file is free software, part of Zipperposition. See file "license" for more details. *)(** {1 Fingerprint term indexing} *)moduleT=TermmoduleI=IndexmoduleS=Substletprof_traverse=ZProf.make"fingerprint.traverse"(* a feature.
A = variable
B = below variable
DB i = De-Bruijn index i
N = invalid position
S c = symbol c
BI s = Built-in symbol s
*)typefeature=A|B|DBofint|N|SofID.t|BIofBuiltin.t|Ignore(* a fingerprint function, it computes several features of a term *)typefingerprint_fun=T.t->featurelist(* TODO: use a feature array, rather than a list *)(* TODO: more efficient implem of traversal, only following branches that
are useful instead of folding and filtering *)letexpand_otf_body=letextra_args=Type.expected_args(Term.tybody)inifCCList.is_emptyextra_argsthenbodyelse(letn=List.lengthextra_argsinT.app(T.DB.shiftnbody)(List.mapi(funity->T.bvar~ty(n-1-i))extra_args))(* compute a feature for a given position *)letrecgfpf?(depth=0)post=letif_and_ort=matchT.viewtwith|T.AppBuiltin(Builtin.(And|Or),_)->true|_->falseinletunfoldt=ifT.is_apptthenT.as_apptelse(matchT.viewtwith|T.AppBuiltin(hd,args)->(* return anything as a head, as it is only important that it is not a variable *)T.true_,args|_->t,[])inlett_ty=Term.tytinletexp_args_num=List.length(Type.expected_argst_ty)inlet_,body=T.open_funtinmatchposwith|[]->letbody=expand_otf_bodyingfpf_root~depth:(depth+exp_args_num)body|i::is->lethd,args=unfoldbodyinifif_and_orbodythenIgnoreelse(letargs=List.filter(funx->not@@T.is_typex)argsin(* if we are sampling something of variable type, it might eta-expand *)ifT.is_varhd||(Type.is_var(snd(Type.open_fun(Term.tybody))))thenBelse(letnum_acutal_args=List.lengthargsinletextra_args,_=Type.open_fun(T.tybody)in(* arguments for eta-expansion *)ifnum_acutal_args>=ithen(letarg=T.DB.shift(List.lengthextra_args)(List.nthargs(i-1))ingfpf~depth:(depth+exp_args_num)isarg)elseifnum_acutal_args+(List.lengthextra_args)>=ithen(letexp_arg_idx=i-num_acutal_argsinletdb_ty=List.nthextra_args(exp_arg_idx-1)inletarg=T.bvar(List.lengthextra_args-exp_arg_idx)~ty:db_tyingfpf~depth:(depth+exp_args_num)isarg)elseN))(* A *)andgfpf_root~deptht=matchT.viewtwith|T.AppBuiltin(b,_)->BIb(* if we are sampling under a function, it can happen that there are
loosely bound variables that can be unified inside LambdaSup. *)|T.DBi->if(i<depth)thenDBielseIgnore|T.Var_->A|T.Constc->Sc|T.App(hd,_)->(matchT.viewhdwithT.Var_->A|T.Consts->Ss|T.DBi->if(i<depth)thenDBielseIgnore|T.AppBuiltin(b,_)->BIb|_->assertfalse)|T.Fun(_,_)->assertfalse(* TODO more efficient way to compute a vector of s: if the fingerprint
is in BFS, compute features during only one traversal of the term? *)letpp_featureout=function|A->CCFormat.fprintfout"A"|B->CCFormat.fprintfout"B"|DBi->CCFormat.fprintfout"DB %d"i|N->CCFormat.fprintfout"N"|Sid->CCFormat.fprintfout"S %a"ID.ppid|BIb->CCFormat.fprintfout"BI %a"Builtin.ppb|Ignore->CCFormat.fprintfout"I"(** compute a feature vector for some positions *)letfppositions=(* list of fingerprint feature functions *)letfpfs=List.mapgfpfpositionsinfunt->List.map(funfpf->fpft)fpfs(* Format.printf "@[Fingerprinting:@ @[%a@]=@[%a@].@]\n" T.pp t (CCList.pp pp_feature) res; *)(** {2 Fingerprint functions} *)letfp3d=fp[[];[1];[1;1]]letfp3w=fp[[];[1];[2]]letfp4d=fp[[];[1];[1;1;];[1;1;1]]letfp4m=fp[[];[1];[2];[1;1]]letfp4w=fp[[];[1];[2];[3]]letfp5m=fp[[];[1];[2];[3];[1;1]]letfp6m=fp[[];[1];[2];[3];[1;1];[1;2]]letfp7=fp[[];[1];[2];[1;1];[1;2];[2;1];[2;2]]letfp7m=fp[[];[1];[2];[3];[1;1];[4];[1;2]]letfp16=fp[[];[1];[2];[3];[4];[1;1];[1;2];[1;3];[2;1];[2;2];[2;3];[3;1];[3;2];[3;3];[1;1;1];[2;1;1]](** {2 Index construction} *)letfeat_to_int_=function|A->0|B->1|S_->2|N->3|DB_->4|BI_->5|Ignore->6letcmp_featuref1f2=matchf1,f2with|A,A|B,B|N,N|Ignore,Ignore->0|Ss1,Ss2->ID.compares1s2|BIb1,BIb2->Builtin.compareb1b2|DBi,DBj->compareij|_->feat_to_int_f1-feat_to_int_f2(** check whether two features are compatible for unification. *)letcompatible_features_uniff1f2=matchf1with|Ss1->(matchf2with|Ss2->ID.equals1s2|A|B|Ignore->true|N|DB_|BI_->false)|BIb1->(matchf2with|BIb2->Builtin.equalb1b2|A|B|Ignore->true|N|DB_|S_->false)|Ignore->true|B->true|A->(matchf2with|N->false|DB_|Ignore|S_|A|B|BI_->true)|DBi->(matchf2with|DBj->i=j|B|A|Ignore->true|S_|N|BI_->false)|N->(matchf2with|N|B|Ignore->true|A|DB_|S_|BI_->false)(** check whether two features are compatible for matching. *)letcompatible_features_matchf1f2=matchf1with|Ss1->(matchf2with|Ss2->ID.equals1s2|Ignore->true|_->false)|BIb1->(matchf2with|BIb2->Builtin.equalb1b2|Ignore->true|_->false)|Ignore|B->true|A->(matchf2with|A|DB_|S_|Ignore|BI_->true|_->false)|DBi->(matchf2with|DBj->i=j|Ignore->true|_->false)|N->(matchf2with|N|Ignore->true|_->false)(** Map whose keys are features *)moduleFeatureMap=Map.Make(structtypet=featureletcompare=cmp_featureend)moduleMake(X:Set.OrderedType)=structtypeelt=X.tmoduleLeaf=Index.MakeLeaf(X)typet={trie:trie;fp:fingerprint_fun;}andtrie=|Empty|NodeoftrieFeatureMap.t|LeafofLeaf.t(** The index *)letdefault_fp=fp7mletempty()={trie=Empty;fp=default_fp;}letempty_withfp={trie=Empty;fp;}letget_fingerprintidx=idx.fpletname="fingerprint_idx"letis_emptyidx=letrecis_emptytrie=matchtriewith|Empty->true|Leafl->Leaf.is_emptyl|Nodemap->FeatureMap.for_all(fun_trie'->is_emptytrie')mapinis_emptyidx.trie(** add t -> data to the trie *)letaddidxtdata=(* recursive insertion *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,[]->letleaf=Leaf.emptyinletleaf=Leaf.addleaftdatainLeafleaf(* creation of new leaf *)|Empty,f::features'->letsubtrie=recurseEmptyfeatures'inletmap=FeatureMap.addfsubtrieFeatureMap.emptyinNodemap(* index new subtrie by feature *)|Nodemap,f::features'->letsubtrie=tryFeatureMap.findfmapwithNot_found->Emptyin(* insert in subtrie *)letsubtrie=recursesubtriefeatures'inletmap=FeatureMap.addfsubtriemapinNodemap(* point to new subtrie *)|Leafleaf,[]->letleaf=Leaf.addleaftdatainLeafleaf(* addition to set *)|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"inletfeatures=idx.fptin(* features of term *){idxwithtrie=recurseidx.triefeatures;}letadd_trie=CCFun.uncurry(addtrie)letadd_seq=Iter.foldadd_letadd_list=List.fold_leftadd_(** remove t -> data from the trie *)letremove_ifidxtleaf_cleaner=(* recursive deletion *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,[]|Empty,_::_->Empty(* keep it empty *)|Nodemap,f::features'->letmap=(* delete from subtrie, if there is a subtrie *)tryletsubtrie=FeatureMap.findfmapinletsubtrie=recursesubtriefeatures'inifsubtrie=EmptythenFeatureMap.removefmapelseFeatureMap.addfsubtriemapwithNot_found->mapin(* if the map is empty, use Empty *)ifFeatureMap.is_emptymapthenEmptyelseNodemap|Leafleaf,[]->letleaf=leaf_cleanerleaftinifLeaf.is_emptyleafthenEmptyelseLeafleaf|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"inletfeatures=idx.fptin(* features of term *){idxwithtrie=recurseidx.triefeatures;}letremoveidxtdata=remove_ifidxt(funleaft->Leaf.removeleaftdata)letupdate_leafidxtdata_filter=remove_ifidxt(funleaft->Leaf.update_leafleaftdata_filter)letremove_trie=CCFun.uncurry(removetrie)letremove_seqdtseq=Iter.foldremove_dtseqletremove_listdtseq=List.fold_leftremove_dtseqletiteridxf=letrecitertrief=matchtriewith|Empty->()|Nodemap->FeatureMap.iter(fun_subtrie->itersubtrief)map|Leafleaf->Leaf.iterleaffiniteridx.triefletpp_keysidx=CCFormat.printf"keys@.";iteridx(funt_->CCFormat.printf"@[%a@],"T.ppt);CCFormat.printf"@."letfoldidxfacc=letrecfoldtriefacc=matchtriewith|Empty->acc|Nodemap->FeatureMap.fold(fun_subtrieacc->foldsubtriefacc)mapacc|Leafleaf->Leaf.foldleafaccfinfoldidx.triefacc(** number of indexed terms *)letsizeidx=letn=ref0initeridx(fun__->incrn);!n(** fold on parts of the trie that are compatible with features *)lettraverse~compatibleidxfeaturesk=let_span=ZProf.enter_profprof_traversein(* fold on the trie *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,_->()|Leafleaf,[]->kleaf(* give the leaf to [k] *)|Nodemap,f::features'->(* fold on any subtrie that is compatible with current feature *)FeatureMap.iter(funf'subtrie->ifcompatibleff'thenrecursesubtriefeatures')map|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"intryrecurseidx.triefeatures;ZProf.exit_prof_span;withe->ZProf.exit_prof_span;raiseeletretrieve_unifiables_auxfold_unify(idx,sc_idx)tk=letfeatures=idx.fp(fstt)inletcompatible=compatible_features_unifintraverse~compatibleidxfeatures(funleaf->fold_unify(leaf,sc_idx)tk)letretrieve_unifiables=retrieve_unifiables_aux(Leaf.fold_unify)letretrieve_unifiables_complete?(unif_alg=JP_unif.unify_scoped)=retrieve_unifiables_aux(Leaf.fold_unify_complete~unif_alg)letretrieve_generalizations?(subst=S.empty)(idx,sc_idx)tk=letfeatures=idx.fp(fstt)in(* compatible t1 t2 if t2 can match t1 *)letcompatiblef1f2=compatible_features_matchf2f1intraverse~compatibleidxfeatures(funleaf->Leaf.fold_match~subst(leaf,sc_idx)tk)letretrieve_specializations?(subst=S.empty)(idx,sc_idx)tk=letfeatures=idx.fp(fstt)inletcompatible=compatible_features_matchintraverse~compatibleidxfeatures(funleaf->Leaf.fold_matched~subst(leaf,sc_idx)tk)letto_dot__=failwith"Fingerprint: to_dot not implemented"end