123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596# 1 "src/owl/nlp/owl_nlp_similarity.ml"(*
* OWL - an OCaml numerical library for scientific computing
* Copyright (c) 2016-2018 Liang Wang <liang.wang@cl.cam.ac.uk>
*)typet=|Cosine|Euclidean|KL_Dletto_string=function|Cosine->"Cosine"|Euclidean->"Euclidean"|KL_D->"Kullback–Leibler divergence"letkl_distance__=0.letcosine_distancexy=lethy=Hashtbl.create(Array.lengthy)inArray.iter(fun(k,v)->Hashtbl.addhykv)y;letz=ref0.inArray.iter(fun(k,v)->matchHashtbl.memhykwith|true->z:=!z+.v*.(Hashtbl.findhyk)|false->())x;(* return the negative since high similarity indicates small distance *)-.(!z)letinner_productxy=lethy=Hashtbl.create(Array.lengthy)inArray.iter(fun(k,v)->Hashtbl.addhykv)y;letz=ref0.inArray.iter(fun(k,v)->matchHashtbl.memhykwith|true->z:=!z+.v*.(Hashtbl.findhyk)|false->())x;!z(* this function aussmes that the elements' ids have been sorted in increasing
order, then perform inner product operation of both passed in vectors.
*)letinner_product_fastxy=(*
Array.sort (fun a b -> Pervasives.compare (fst a) (fst b)) x;
Array.sort (fun a b -> Pervasives.compare (fst a) (fst b)) y;
*)letxi=ref0inletyi=ref0inletxn=Array.lengthxinletyn=Array.lengthyinletz=ref0.inwhile!xi<xn&&!yi<yndoletxk,xv=x.(!xi)inletyk,yv=y.(!yi)inifxk=ykthen(z:=!z+.xv*.yv;xi:=!xi+1;yi:=!yi+1;)elseifxk<ykthenxi:=!xi+1elseifxk>ykthenyi:=!yi+1done;!zleteuclidean_distancexy=leth=Hashtbl.create(Array.lengthx)inArray.iter(fun(k,a)->Hashtbl.addhka)x;Array.iter(fun(k,b)->matchHashtbl.memhkwith|true->leta=Hashtbl.findhkinHashtbl.replacehk(a-.b)|false->Hashtbl.addhkb)y;letz=ref0.inHashtbl.iter(fun_v->z:=!z+.v*.v)h;sqrt!zletdistance=function|Cosine->cosine_distance|Euclidean->euclidean_distance|KL_D->kl_distance(* ends here *)