123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365(********************************************************************************)(* Phrase.ml
Copyright (c) 2018 Dario Teixeira <dario.teixeira@nleyten.com>
This software is distributed under the terms of the ISC license.
See LICENSE file for full license text.
*)(********************************************************************************)openPreludeopenWord(********************************************************************************)(** {1 Signatures} *)(********************************************************************************)moduletypeINTEGER=sigtypetvalzero:tvalone:tvalof_int:int->tvalto_int:t->intvaladd:t->t->tvalsub:t->t->tvallogand:t->t->tvallogor:t->t->tvalshift_left:t->int->tvalshift_right_logical:t->int->tend(********************************************************************************)(** {1 Definitions about the internal representation of phrases} *)(********************************************************************************)moduleInternal=structtyper32={adj:Adjective.t;noun:Noun.t;loc:Location.t;}typer64={subj_adj:Adjective.t;subj_noun:Noun.t;verb:Verb.t;obj_adj:Adjective.t;obj_noun:Noun.t;obj_loc:Location.t;}typet=|R32ofr32|R64ofr64letppfmtx=()letequal=(=)end(********************************************************************************)(** {1 Definitions about the external (user-visible) representation of phrases} *)(********************************************************************************)moduletypeEXTERNAL=sigtypettypeof_string_errorvalof_internal:Internal.t->tvalto_internal:t->Internal.tvalof_string:string->(t,of_string_error)resultvalto_string:t->stringendmoduleHexa=structopenInternaltypet=Int32ofint32|Int64ofint64typeof_string_error=[`Invalid32|`Invalid64|`Invalid_lengthofint]typeof_bytes_error=[`Invalid_lengthofint]letof_internalinternal=letaux(typeu)(moduleI:INTEGERwithtypet=u)to_hexaxs=letrecloopacc=function|[]->to_hexaacc|(i,shift)::tl->letx=I.shift_left(I.of_inti)shiftinletacc=I.addaccxinloopacctlinloopI.zeroxsinmatchinternalwith|R32x->aux(moduleInt32)(funx->Int32x)[(Adjective.to_intx.adj,0);(Noun.to_intx.noun,11);(Location.to_intx.loc,22);]|R64x->aux(moduleInt64)(funx->Int64x)[(Adjective.to_intx.subj_adj,0);(Noun.to_intx.subj_noun,11);(Verb.to_intx.verb,22);(Adjective.to_intx.obj_adj,32);(Noun.to_intx.obj_noun,43);(Location.to_intx.obj_loc,54);]letto_internalhexa=letget(typeu)(moduleI:INTEGERwithtypet=u)xfirstlen=letmask=I.(shift_left(sub(shift_leftonelen)one)first)inletv=I.(shift_right_logical(logandmaskx)first)inI.to_intvinmatchhexawith|Int32x->letget=get(moduleInt32)inletadj=getx011|>Adjective.of_intinletnoun=getx1111|>Noun.of_intinletloc=getx2210|>Location.of_intinR32{adj;noun;loc}|Int64x->letget=get(moduleInt64)inletsubj_adj=getx011|>Adjective.of_intinletsubj_noun=getx1111|>Noun.of_intinletverb=getx2210|>Verb.of_intinletobj_adj=getx3211|>Adjective.of_intinletobj_noun=getx4311|>Noun.of_intinletobj_loc=getx5410|>Location.of_intinR64{subj_adj;subj_noun;verb;obj_adj;obj_noun;obj_loc}letof_stringstr=matchString.lengthstrwith|8->beginmatchInt32.of_string_opt("0x"^str)with|Somex->Ok(Int32x)|None->Error`Invalid32end|16->beginmatchInt64.of_string_opt("0x"^str)with|Somex->Ok(Int64x)|None->Error`Invalid64end|x->Error(`Invalid_lengthx)letto_string=function|Int32x->Printf.sprintf"%08lx"x|Int64x->Printf.sprintf"%016Lx"xletof_bytesbuf=letaux(typeu)(moduleI:INTEGERwithtypet=u)len=letrecloopacci=ifi<lenthenletv=Bytes.getbuf(len-i-1)|>Char.code|>I.of_intinletmask=I.shift_leftv(8*i)inletacc=I.logoraccmaskinloopacc(i+1)elseaccinloopI.zero0inmatchBytes.lengthbufwith|4->Ok(Int32(aux(moduleInt32)4))|8->Ok(Int64(aux(moduleInt64)8))|x->Error(`Invalid_lengthx)letto_bytesx=letaux(typeu)(moduleI:INTEGERwithtypet=u)lenx=letbytes=Bytes.createleninletmask=I.of_int255infori=0tolen-1doletv=I.(shift_right_logicalx(8*i)|>logandmask|>to_int)inletb=Char.chrvinBytes.setbytes(len-i-1)bdone;bytesinmatchxwith|Int32x->aux(moduleInt32)4x|Int64x->aux(moduleInt64)8xendmoduleText=structopenInternaltypet={text:string;internal:Internal.t;}typeunknown_word={name:string;word:string;same_prefix:stringoption;suggestions:stringlistarray;}typeunknown_abbr={name:string;word:string;}typeof_string_error=[`Unknown_wordsofunknown_wordlist|`Parsing_errorofstring]typeof_abbr_error=[`Unknown_abbrsofunknown_abbrlist|`Parsing_errorofstring]letsentence32abc=Printf.sprintf"%s %s from %s"(String.capitalize_asciia)b(String.capitalize_asciic)letsentence64abcdef=Printf.sprintf"%s %s %s %s %s from %s"(String.capitalize_asciia)bcde(String.capitalize_asciif)letof_internalinternal=matchinternalwith|R32x->leta=Adjective.to_stringx.adjinletb=Noun.to_stringx.nouninletc=Location.to_stringx.locinlettext=sentence32abcin{text;internal}|R64x->leta=Adjective.to_stringx.subj_adjinletb=Noun.to_stringx.subj_nouninletc=Verb.to_stringx.verbinletd=Adjective.to_stringx.obj_adjinlete=Noun.to_stringx.obj_nouninletf=Location.to_stringx.obj_locinlettext=sentence64abcdefin{text;internal}letto_internalx=x.internalletsplit_wordsstr=str|>String.lowercase_ascii|>String.split_on_char' '|>List.filter((<>)"")letof_stringstr=letprocess(typeu)(moduleW:Word.Swithtypet=u)errorsword=matchW.of_stringwordwith|Somex->(Somex,errors)|None->let(same_prefix,suggestions)=W.suggest~max_distance:2wordinleterror={name=W.name;word;same_prefix;suggestions}in(None,error::errors)inletrecextractaccactionswords=match(actions,words)with|([],[])->Someacc|([],_)->None|(`Word::_,[])->None|(`Word::xtl,y::ytl)->extract(y::acc)xtlytl|(`Opt_::xtl,[])->extractaccxtl[]|(`Optx::xtl,y::ytl)whenx=y->extractaccxtlytl|(`Optx::xtl,ys)->extractaccxtlysinletwords=split_wordsstrinifList.lengthwords<=5thenmatchextract[][`Opt"the";`Word;`Word;`Opt"from";`Word]wordswith|Some[c;b;a]->leterrors=[]inlet(a',errors)=process(moduleAdjective)errorsainlet(b',errors)=process(moduleNoun)errorsbinlet(c',errors)=process(moduleLocation)errorscinbeginmatch(a',b',c',errors)with|(Someadj,Somenoun,Someloc,_)->letinternal=R32{adj;noun;loc}inlettext=sentence32abcinOk{internal;text}|(_,_,_,errors)->Error(`Unknown_wordserrors)end|_->Error(`Parsing_errorstr)elsematchextract[][`Opt"the";`Word;`Word;`Word;`Opt"the";`Word;`Word;`Opt"from";`Word]wordswith|Some[f;e;d;c;b;a]->leterrors=[]inlet(a',errors)=process(moduleAdjective)errorsainlet(b',errors)=process(moduleNoun)errorsbinlet(c',errors)=process(moduleVerb)errorscinlet(d',errors)=process(moduleAdjective)errorsdinlet(e',errors)=process(moduleNoun)errorseinlet(f',errors)=process(moduleLocation)errorsfinbeginmatch(a',b',c',d',e',f',errors)with|(Somesubj_adj,Somesubj_noun,Someverb,Someobj_adj,Someobj_noun,Someobj_loc,_)->letinternal=R64{subj_adj;subj_noun;verb;obj_adj;obj_noun;obj_loc}inlettext=sentence64abcdefinOk{internal;text}|(_,_,_,_,_,_,errors)->Error(`Unknown_wordserrors)end|_->Error(`Parsing_errorstr)letto_stringx=x.textletof_abbr_stringstr=letprocess(typeu)(moduleW:Word.Swithtypet=u)errorsword=matchW.of_abbr_stringwordwith|Somex->(Somex,errors)|None->(None,{name=W.name;word}::errors)inmatchsplit_wordsstrwith|[a;b;c]->leterrors=[]inlet(a',errors)=process(moduleAdjective)errorsainlet(b',errors)=process(moduleNoun)errorsbinlet(c',errors)=process(moduleLocation)errorscinbeginmatch(a',b',c',errors)with|(Someadj,Somenoun,Someloc,_)->letinternal=R32{adj;noun;loc}inletadj'=Adjective.to_stringadjinletnoun'=Noun.to_stringnouninletloc'=Location.to_stringlocinlettext=sentence32adj'noun'loc'inOk{internal;text}|(_,_,_,errors)->Error(`Unknown_abbrserrors)end|[a;b;c;d;e;f]->leterrors=[]inlet(a',errors)=process(moduleAdjective)errorsainlet(b',errors)=process(moduleNoun)errorsbinlet(c',errors)=process(moduleVerb)errorscinlet(d',errors)=process(moduleAdjective)errorsdinlet(e',errors)=process(moduleNoun)errorseinlet(f',errors)=process(moduleLocation)errorsfinbeginmatch(a',b',c',d',e',f',errors)with|(Somesubj_adj,Somesubj_noun,Someverb,Someobj_adj,Someobj_noun,Someobj_loc,_)->letinternal=R64{subj_adj;subj_noun;verb;obj_adj;obj_noun;obj_loc}inletsubj_adj'=Adjective.to_stringsubj_adjinletsubj_noun'=Noun.to_stringsubj_nouninletverb'=Verb.to_stringverbinletobj_adj'=Adjective.to_stringobj_adjinletobj_noun'=Noun.to_stringobj_nouninletobj_loc'=Location.to_stringobj_locinlettext=sentence64subj_adj'subj_noun'verb'obj_adj'obj_noun'obj_loc'inOk{internal;text}|(_,_,_,_,_,_,errors)->Error(`Unknown_abbrserrors)end|_->Error(`Parsing_errorstr)letto_abbr_stringx=matchx.internalwith|R32x->leta=Adjective.to_abbr_stringx.adjinletb=Noun.to_abbr_stringx.nouninletc=Location.to_abbr_stringx.locinPrintf.sprintf"%s %s %s"abc|R64x->leta=Adjective.to_abbr_stringx.subj_adjinletb=Noun.to_abbr_stringx.subj_nouninletc=Verb.to_abbr_stringx.verbinletd=Adjective.to_abbr_stringx.obj_adjinlete=Noun.to_abbr_stringx.obj_nouninletf=Location.to_abbr_stringx.obj_locinPrintf.sprintf"%s %s %s %s %s %s"abcdefend