123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297(* This file is free software, part of Zipperposition. See file "license" for more details. *)(** {1 Fingerprint term indexing} *)moduleT=TermmoduleI=IndexmoduleS=Substletprof_traverse=Util.mk_profiler"fingerprint.traverse"(* a feature.
A = variable
B = below variable
NonFO = Builtin/nonFO (non-syntactically unifiable)
N = invalid position
S = symbol
*)typefeature=A|B|NonFO|N|SofID.t(* a fingerprint function, it computes several features of a term *)typefingerprint_fun=T.t->featurelist(* TODO: use a feature array, rather than a list *)(* TODO: more efficient implem of traversal, only following branches that
are useful instead of folding and filtering *)(* compute a feature for a given position *)letrecgfpfpost=matchpos,T.Classic.viewtwith|[],T.Classic.Var_->A|[],T.Classic.DB_->B|[],_whennot(Unif.Ty.type_is_unifiable@@T.tyt)||Type.is_fun(T.tyt)->NonFO|[],T.Classic.App(s,_)->Ss|i::pos',T.Classic.App(_,l)->begintrygfpfpos'(List.nthli)(* recurse in subterm *)withFailure_->N(* not a position in t *)end|_::_,T.Classic.DB_->N|_::_,T.Classic.Var_->B(* under variable *)|_,T.Classic.AppBuiltin_|_,T.Classic.NonFO->NonFO(* don't filter! *)(* TODO more efficient way to compute a vector of features: if the fingerprint
is in BFS, compute features during only one traversal of the term? *)(** compute a feature vector for some positions *)letfppositions=(* list of fingerprint feature functions *)letfpfs=List.map(funpos->gfpfpos)positionsinfunt->List.map(funfpf->fpft)fpfs(** {2 Fingerprint functions} *)letfp3d=fp[[];[1];[1;1]]letfp3w=fp[[];[1];[2]]letfp4d=fp[[];[1];[1;1;];[1;1;1]]letfp4m=fp[[];[1];[2];[1;1]]letfp4w=fp[[];[1];[2];[3]]letfp5m=fp[[];[1];[2];[3];[1;1]]letfp6m=fp[[];[1];[2];[3];[1;1];[1;2]]letfp7=fp[[];[1];[2];[1;1];[1;2];[2;1];[2;2]]letfp7m=fp[[];[1];[2];[3];[1;1];[4];[1;2]]letfp16=fp[[];[1];[2];[3];[4];[1;1];[1;2];[1;3];[2;1];[2;2];[2;3];[3;1];[3;2];[3;3];[1;1;1];[2;1;1]](** {2 Index construction} *)letfeat_to_int_=function|A->0|B->1|S_->2|N->3|NonFO->4letcmp_featuref1f2=matchf1,f2with|A,A|B,B|N,N|NonFO,NonFO->0|Ss1,Ss2->ID.compares1s2|_->feat_to_int_f1-feat_to_int_f2(** check whether two features are compatible for unification. *)letcompatible_features_uniff1f2=matchf1,f2with|Ss1,Ss2->ID.equals1s2|NonFO,_|_,NonFO|B,_|_,B->true|A,N|N,A->false|A,_|_,A->true|N,S_|S_,N->false|N,N->true(** check whether two features are compatible for matching. *)letcompatible_features_matchf1f2=matchf1,f2with|Ss1,Ss2->ID.equals1s2|NonFO,_|_,NonFO|B,_->true|N,N->true|N,_->false|_,N->false|A,B->false|A,_->true|S_,_->false(** Map whose keys are features *)moduleFeatureMap=Map.Make(structtypet=featureletcompare=cmp_featureend)moduleMake(X:Set.OrderedType)=structtypeelt=X.tmoduleLeaf=Index.MakeLeaf(X)typet={trie:trie;fp:fingerprint_fun;}andtrie=|Empty|NodeoftrieFeatureMap.t|LeafofLeaf.t(** The index *)letdefault_fp=fp7mletempty()={trie=Empty;fp=default_fp;}letempty_withfp={trie=Empty;fp;}letget_fingerprintidx=idx.fpletname="fingerprint_idx"letis_emptyidx=letrecis_emptytrie=matchtriewith|Empty->true|Leafl->Leaf.is_emptyl|Nodemap->FeatureMap.for_all(fun_trie'->is_emptytrie')mapinis_emptyidx.trie(** add t -> data to the trie *)letaddidxtdata=(* recursive insertion *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,[]->letleaf=Leaf.emptyinletleaf=Leaf.addleaftdatainLeafleaf(* creation of new leaf *)|Empty,f::features'->letsubtrie=recurseEmptyfeatures'inletmap=FeatureMap.addfsubtrieFeatureMap.emptyinNodemap(* index new subtrie by feature *)|Nodemap,f::features'->letsubtrie=tryFeatureMap.findfmapwithNot_found->Emptyin(* insert in subtrie *)letsubtrie=recursesubtriefeatures'inletmap=FeatureMap.addfsubtriemapinNodemap(* point to new subtrie *)|Leafleaf,[]->letleaf=Leaf.addleaftdatainLeafleaf(* addition to set *)|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"inletfeatures=idx.fptin(* features of term *){idxwithtrie=recurseidx.triefeatures;}letadd_trie=CCFun.uncurry(addtrie)letadd_seq=Iter.foldadd_letadd_list=List.fold_leftadd_(** remove t -> data from the trie *)letremoveidxtdata=(* recursive deletion *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,[]|Empty,_::_->Empty(* keep it empty *)|Nodemap,f::features'->letmap=(* delete from subtrie, if there is a subtrie *)tryletsubtrie=FeatureMap.findfmapinletsubtrie=recursesubtriefeatures'inifsubtrie=EmptythenFeatureMap.removefmapelseFeatureMap.addfsubtriemapwithNot_found->mapin(* if the map is empty, use Empty *)ifFeatureMap.is_emptymapthenEmptyelseNodemap|Leafleaf,[]->letleaf=Leaf.removeleaftdatainifLeaf.is_emptyleafthenEmptyelseLeafleaf|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"inletfeatures=idx.fptin(* features of term *){idxwithtrie=recurseidx.triefeatures;}letremove_trie=CCFun.uncurry(removetrie)letremove_seqdtseq=Iter.foldremove_dtseqletremove_listdtseq=List.fold_leftremove_dtseqletiteridxf=letrecitertrief=matchtriewith|Empty->()|Nodemap->FeatureMap.iter(fun_subtrie->itersubtrief)map|Leafleaf->Leaf.iterleaffiniteridx.triefletfoldidxfacc=letrecfoldtriefacc=matchtriewith|Empty->acc|Nodemap->FeatureMap.fold(fun_subtrieacc->foldsubtriefacc)mapacc|Leafleaf->Leaf.foldleafaccfinfoldidx.triefacc(** number of indexed terms *)letsizeidx=letn=ref0initeridx(fun__->incrn);!n(** fold on parts of the trie that are compatible with features *)lettraverse~compatibleidxfeaturesk=Util.enter_profprof_traverse;(* fold on the trie *)letrecrecursetriefeatures=matchtrie,featureswith|Empty,_->()|Leafleaf,[]->kleaf(* give the leaf to [k] *)|Nodemap,f::features'->(* fold on any subtrie that is compatible with current feature *)FeatureMap.iter(funf'subtrie->ifcompatibleff'thenrecursesubtriefeatures')map|Node_,[]|Leaf_,_::_->failwith"different feature length in fingerprint trie"intryrecurseidx.triefeatures;Util.exit_profprof_traverse;withe->Util.exit_profprof_traverse;raiseeletretrieve_unifiables?(subst=Unif_subst.empty)(idx,sc_idx)tk=letfeatures=idx.fp(fstt)inletcompatible=compatible_features_unifintraverse~compatibleidxfeatures(funleaf->Leaf.fold_unify~subst(leaf,sc_idx)tk)letretrieve_generalizations?(subst=S.empty)(idx,sc_idx)tk=letfeatures=idx.fp(fstt)in(* compatible t1 t2 if t2 can match t1 *)letcompatiblef1f2=compatible_features_matchf2f1intraverse~compatibleidxfeatures(funleaf->Leaf.fold_match~subst(leaf,sc_idx)tk)letretrieve_specializations?(subst=S.empty)(idx,sc_idx)tk=letfeatures=idx.fp(fstt)inletcompatible=compatible_features_matchintraverse~compatibleidxfeatures(funleaf->Leaf.fold_matched~subst(leaf,sc_idx)tk)letto_dot__=failwith"Fingerprint: to_dot not implemented"end