123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186(********************************************************************************)(* Word.ml
Copyright (c) 2018 Dario Teixeira <dario.teixeira@nleyten.com>
This software is distributed under the terms of the ISC license.
See LICENSE file for full license text.
*)(********************************************************************************)openPrelude(********************************************************************************)(** {1 Module types} *)(********************************************************************************)moduletypeSOURCE=sigvalname:stringvalcount:intvaldata:stringvaltransform:string->stringendmoduletypeS=sigtypetvalname:stringvalcount:intvalof_int:int->tvalto_int:t->intvalof_string:string->toptionvalto_string:t->stringvalof_abbr_string:string->toptionvalto_abbr_string:t->stringvalsuggest:max_distance:int->string->stringoption*stringlistarrayincludeTESTABLEwithtypet:=tend(********************************************************************************)(** {1 Functors} *)(********************************************************************************)moduleMake(Source:SOURCE):S=structtypet=intletname=Source.nameletcount=Source.countletprefix_len=4letwordvec=letlen=String.lengthSource.datainletvec=Array.makeSource.count""inletrecloopidxstart=ifstart>lenthenassert(idx=Source.count)elseletfinish=matchString.index_from_optSource.datastart'\n'with|Somex->x|None->leninletword=String.subSource.datastart(finish-start)inletidx'=ifString.lengthword>0&&word.[0]<>'#'thenbegin(ifidx<Source.countthenvec.(idx)<-Source.transformword);idx+1endelseidxinloopidx'(finish+1)inloop00;Array.sortString.comparevec;vecletprefix_compareab=letlen_a=String.lengthainletlen_b=String.lengthbinletrecloopi=ifi>=prefix_lenthen0elsematch(i<len_a,i<len_b)with|(true,true)->letcmp=Char.comparea.[i]b.[i]inifcmp=0thenloop(i+1)elsecmp|(true,false)->1|(false,true)->-1|(false,false)->0inloop0letof_intx=xletto_intx=xletof_stringstr=Array.binary_searchString.comparestrwordvecletto_stringx=wordvec.(x)letof_abbr_stringstr=ifString.lengthstr>prefix_lenthenNoneelseArray.binary_searchprefix_comparestrwordvecletto_abbr_stringx=letstr=wordvec.(x)inifString.lengthstr>prefix_lenthenString.substr0prefix_lenelsestrletsuggest~max_distancestr=letlen=String.lengthstrinletstr_abbr=iflen>prefix_lenthenString.substr0prefix_lenelsestrinletsame_prefix=matchof_abbr_stringstr_abbrwith|Somex->Some(to_stringx)|None->Noneinletsuggestions=Array.make(max_distance+1)[]inletfword=letdistance=String.edit_distancestrwordinifdistance<=max_distancethensuggestions.(distance)<-word::suggestions.(distance)inArray.iterfwordvec;(same_prefix,suggestions)letpp=Format.pp_print_intletequal=(=)end(********************************************************************************)(** {1 Modules encapsulating word lists} *)(********************************************************************************)moduleAdjective=Make(structletname="adjective"letcount=2048letdata=[%blob"../resources/adjectives.txt"]lettransformx=xend)moduleLocation=Make(structletname="location"letcount=1024letdata=[%blob"../resources/locations.txt"]lettransformx=xend)moduleNoun=Make(structletname="noun"letcount=2048letdata=[%blob"../resources/nouns.txt"]lettransformx=xend)moduleVerb=Make(structletname="verb"letcount=1024letdata=[%blob"../resources/verbs.txt"]lettransformx=letis_vowel=function'a'|'e'|'i'|'o'|'u'->true|_->falseinletlen=String.lengthxinletpenult=x.[len-2]inletult=x.[len-1]inmatch(penult,ult)with|('s','s')|(_,'x')|(_,'z')|('s','h')|('c','h')->x^"es"|(v,'s')whenis_vowelv->x^"ses"|(c,'o')whennot(is_vowelc)->x^"es"|(c,'y')whennot(is_vowelc)->String.subx0(len-1)^"ies"|_->x^"s"end)