12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697# 1 "src/owl/nlp/owl_nlp_similarity.ml"(*
* OWL - OCaml Scientific Computing
* Copyright (c) 2016-2022 Liang Wang <liang@ocaml.xyz>
*)typet=|Cosine|Euclidean|KL_Dletto_string=function|Cosine->"Cosine"|Euclidean->"Euclidean"|KL_D->"Kullback–Leibler divergence"letkl_distance__=0.letcosine_distancexy=lethy=Hashtbl.create(Array.lengthy)inArray.iter(fun(k,v)->Hashtbl.addhykv)y;letz=ref0.inArray.iter(fun(k,v)->matchHashtbl.memhykwith|true->z:=!z+.(v*.Hashtbl.findhyk)|false->())x;(* return the negative since high similarity indicates small distance *)-.!zletinner_productxy=lethy=Hashtbl.create(Array.lengthy)inArray.iter(fun(k,v)->Hashtbl.addhykv)y;letz=ref0.inArray.iter(fun(k,v)->matchHashtbl.memhykwith|true->z:=!z+.(v*.Hashtbl.findhyk)|false->())x;!z(* this function aussmes that the elements' ids have been sorted in increasing
order, then perform inner product operation of both passed in vectors.
*)letinner_product_fastxy=(*
Array.sort (fun a b -> Stdlib.compare (fst a) (fst b)) x;
Array.sort (fun a b -> Stdlib.compare (fst a) (fst b)) y;
*)letxi=ref0inletyi=ref0inletxn=Array.lengthxinletyn=Array.lengthyinletz=ref0.inwhile!xi<xn&&!yi<yndoletxk,xv=x.(!xi)inletyk,yv=y.(!yi)inifxk=ykthen(z:=!z+.(xv*.yv);xi:=!xi+1;yi:=!yi+1)elseifxk<ykthenxi:=!xi+1elseifxk>ykthenyi:=!yi+1done;!zleteuclidean_distancexy=leth=Hashtbl.create(Array.lengthx)inArray.iter(fun(k,a)->Hashtbl.addhka)x;Array.iter(fun(k,b)->matchHashtbl.memhkwith|true->leta=Hashtbl.findhkinHashtbl.replacehk(a-.b)|false->Hashtbl.addhkb)y;letz=ref0.inHashtbl.iter(fun_v->z:=!z+.(v*.v))h;sqrt!zletdistance=function|Cosine->cosine_distance|Euclidean->euclidean_distance|KL_D->kl_distance(* ends here *)