123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224(* This file is free software. See file "license" for more details. *)(** {1 Very Simple Parser Combinators} *)openCCShims_(*$inject
module T = struct
type tree = L of int | N of tree * tree
end
open T
let mk_leaf x = L x
let mk_node x y = N(x,y)
let ptree = fix @@ fun self ->
skip_space *>
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let ptree' = fix_memo @@ fun self ->
skip_space *>
( (char '(' *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let rec pptree = function
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
| L x -> Printf.sprintf "L %d" x
let errpp pp = function
| Ok x -> "Ok " ^ pp x
| Error s -> "Error " ^ s
let errpptree = errpp pptree
let erreq eq x y = match x, y with
| Ok x, Ok y -> eq x y
| Error _ , Error _ -> true
| _ -> false ;;
*)(*$= & ~printer:errpptree
(Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ptree "(1 (2 3))" )
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ptree "((1 2) (3 (4 5)))" )
(Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ptree' "(1 (2 3))" )
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ptree' "((1 2) (3 (4 5)))" )
*)(*$R
let p = U.list ~sep:"," U.word in
let printer = function
| Ok l -> "Ok " ^ CCFormat.(to_string (Dump.list string_quoted)) l
| Error s -> "Error " ^ s
in
assert_equal ~printer
(Ok ["abc"; "de"; "hello"; "world"])
(parse_string p "[abc , de, hello ,world ]");
*)(*$R
let test n =
let p = CCParse.(U.list ~sep:"," U.int) in
let l = CCList.(1 -- n) in
let l_printed =
CCFormat.(to_string (within "[" "]" (list ~sep:(return ",") int))) l in
let l' = CCParse.parse_string_exn p l_printed in
assert_equal ~printer:Q.Print.(list int) l l'
in
test 300_000;
*)(*$R
let open CCParse.Infix in
let module P = CCParse in
let parens p = P.char '(' *> p <* P.char ')' in
let add = P.char '+' *> P.return (+) in
let sub = P.char '-' *> P.return (-) in
let mul = P.char '*' *> P.return ( * ) in
let div = P.char '/' *> P.return ( / ) in
let integer =
P.chars1_if (function '0'..'9'->true|_->false) >|= int_of_string in
let chainr1 e op =
P.fix (fun r ->
e >>= fun x -> (op <*> P.return x <*> r) <|> P.return x) in
let expr : int P.t =
P.fix (fun expr ->
let factor = parens expr <|> integer in
let term = chainr1 factor (mul <|> div) in
chainr1 term (add <|> sub)) in
assert_equal (Ok 6) (P.parse_string expr "4*1+2");
assert_equal (Ok 12) (P.parse_string expr "4*(1+2)");
()
*)moduleMemo_tbl=Hashtbl.Make(structtypet=int*int(* id of parser, position *)letequal((a,b):t)(c,d)=a=c&&b=dlethash=Hashtbl.hashend)moduleMemo_state=struct(* table of closures, used to implement universal type *)typet=(unit->unit)Memo_tbl.t(* unique ID for each parser *)letid_=ref0end(* state common to all parser instances *)typecommon_state={str:string;mutableline_offsets:intarrayoption;mutablememo:Memo_state.toption;}typeposition={pos_cs:common_state;pos_offset:int;mutablepos_lc:(int*int)option;}modulePosition=structtypet=positionletcompute_line_offsets_(s:string):intarray=letlines=CCVector.create()inleti=ref0inCCVector.pushlines0;while!i<String.lengthsdomatchString.index_froms!i'\n'with|exceptionNot_found->i:=String.lengths|j->CCVector.pushlinesj;i:=j+1done;CCVector.to_arraylines;;letline_offsets_cs=matchcs.line_offsetswith|Somelines->lines|None->letlines=compute_line_offsets_cs.strincs.line_offsets<-Somelines;linesletint_cmp_:int->int->int=compare(* TODO: use pos_cs.line_offsets *)(* actually re-compute line and column from the buffer *)letcompute_line_and_col_(cs:common_state)(off:int):int*int=letoffsets=line_offsets_csinassert(offsets.(0)=0);beginmatchCCArray.bsearch~cmp:int_cmp_offoffsetswith|`At0->0,0|`Atn->(n-1),off-offsets.(n-1)-1|`Just_aftern->n,off-offsets.(n)|`Empty->assertfalse|`All_bigger->assertfalse(* off >= 0, and offsets[0] == 0 *)|`All_lower->letn=Array.lengthoffsets-1inn,off-offsets.(n)endletline_and_columnself=matchself.pos_lcwith|Sometup->tup|None->lettup=compute_line_and_col_self.pos_csself.pos_offsetinself.pos_lc<-Sometup;(* save *)tupletlineself=fst(line_and_columnself)letcolumnself=snd(line_and_columnself)letppoutself=letl,c=line_and_columnselfinFormat.fprintfout"at line %d, column %d"lcendmoduleError=structtypet={msg:unit->string;pos:position;}letpositionself=self.posletline_and_columnself=Position.line_and_columnself.posletmsgself=self.msg()letto_stringself=letline,col=line_and_columnselfinPrintf.sprintf"at line %d, char %d: %s"linecol(self.msg())letppoutself=letline,col=line_and_columnselfinFormat.fprintfout"@[<hv>at line %d, char %d:@ %s@]"linecol(self.msg())endtype+'aor_error=('a,Error.t)result(** Purely functional state passed around *)typestate={cs:common_state;i:int;(* offset in [str] *)j:int;(* end pointer in [str], excluded. [len = j-i] *)}(* FIXME: replace memo with:
[global : global_st ref]
where:
[type global = {
mutable memo: Memo_state.t option;
line_offsets: int CCVector.vector;
}
with line_offsets used to cache the offset where each line begins,
and is computed lazily, to make {!Position.line_and_column}
faster if called many times.
*)let[@inline]char_equal(a:char)b=Stdlib.(=)abletstring_equal=String.equal(* FIXME: printer for error
let () = Printexc.register_printer
(function
| ParseError (b,msg) ->
Some (Format.sprintf "@[<v>%s@ %s@]" (msg()) (string_of_branch b))
| _ -> None)
*)let[@inline]const_str_x():string=xletstate_of_stringstr=lets={cs={str;memo=None;line_offsets=None};i=0;j=String.lengthstr;}inslet[@inline]is_donest=st.i>=st.jlet[@inline]curst=st.cs.str.[st.i]letpos_of_st_st:position={pos_cs=st.cs;pos_offset=st.i;pos_lc=None}letmk_error_stmsg:Error.t={Error.msg;pos=pos_of_st_st}(* consume one char, passing it to [ok]. *)letconsume_st~ok~err=ifis_donestthen(letmsg=const_str_"unexpected end of input"inerr(mk_error_stmsg))else(letc=st.cs.str.[st.i]inok{stwithi=st.i+1}c)type'at={run:'b.state->ok:(state->'a->'b)->err:(Error.t->'b)->'b;}[@@unboxed](** Takes the input and two continuations:
{ul
{- [ok] to call with the result and new state when it's done}
{- [err] to call when the parser met an error}
}
*)letreturnx:_t={run=funst~ok~err:_->okstx}letpure=returnletmapf(p:'at):_t={run=funst~ok~err->p.runst~ok:(funstx->okst(fx))~err}letbindf(p:'at):_t={run=funst~ok~err->p.runst~ok:(funstx->letp2=fxinp2.runst~ok~err)~err}letap(f:_t)(a:_t):_t={run=funst~ok~err->f.runst~ok:(funstf->a.runst~ok:(funstx->okst(fx))~err)~err}letap_left(a:_t)(b:_t):_t={run=funst~ok~err->a.runst~ok:(funstx->b.runst~ok:(funst_->okstx)~err)~err}letap_right(a:_t)(b:_t):_t={run=funst~ok~err->a.runst~ok:(funst_->b.runst~ok:(funstx->okstx)~err)~err}letor_(p1:'at)(p2:'at):_t={run=funst~ok~err->p1.runst~ok~err:(fun_e->p2.runst~ok~err)}letbothab={run=funst~ok~err->a.runst~ok:(funstxa->b.runst~ok:(funstxb->okst(xa,xb))~err)~err}letset_error_messagemsg(p:'at):_t={run=funst~ok~err->p.runst~ok~err:(fun_e->err(mk_error_st(const_str_msg)))}moduleInfix=structlet[@inline](>|=)pf=mapfplet[@inline](>>=)pf=bindfplet(<*>)=aplet(<*)=ap_leftlet(*>)=ap_rightlet(<|>)=or_let(|||)=bothlet[@inline](<?>)pmsg=set_error_messagemsgp[@@@ifge4.8]let(let+)=(>|=)let(let*)=(>>=)let(and+)=bothlet(and*)=(and+)[@@@endif]endincludeInfixletmap2fxy=puref<*>x<*>yletmap3fxyz=puref<*>x<*>y<*>zletjunk_(st:state):state=assert (st.i<st.j);{stwithi=st.i+1}leteoi={run=funst~ok~err ->ifis_donestthenokst()elseerr (mk_error_st(const_str_"expected end of input"))}(*$= & ~printer:(errpp Q.Print.bool) ~cmp:(erreq (=))
(Ok true) (parse_string (U.bool <* eoi) "true")
(Error "") (parse_string (U.bool <* eoi) "true ")
(Ok true) (parse_string (U.bool <* skip_white <* eoi) "true")
*)letwith_posp:_t={run=funst~ok~err ->p.runst~ok:(funst'x->okst'(x,pos_of_st_st))~err}letpos:_t={run=funst~ok~err:_->okst(pos_of_st_st)}(*$= & ~printer:Q.Print.(pair int int)
(0,5) (let p = any_char_n 5 *> pos in \
match parse_string p "abcde " with \
| Ok p -> Position.line_and_column p \
| Error _ -> assert false)
*)(*$= & ~printer:Q.Print.(list @@ pair int int)
[(0,2); (1,3); (2,1); (3,0); (4,0); (5,2)] \
(let p = each_line (skip_space *> pos) in \
match parse_string p " a\n b\nc\n\n\n a" with \
| Ok ps -> List.map Position.line_and_column ps \
| Error _ -> assert false)
*)(* a slice is just a state, which makes {!recurse} quite easy. *)typeslice=statemoduleSlice=structtypet=sliceletlengthsl=sl.j-sl.iletis_empty sl=sl.i=sl.jletto_stringsl=String.subsl.cs.strsl.i(lengthsl)endletrecurse slicep:_t={run=fun_st~ok~err ->(* make sure these states are related. all slices share the
same reference as the initial state they derive from. *)assertCCShims_.Stdlib.(_st.cs==slice.cs);p.run slice~ok~err}letall={run=funst~ok~err:_->ifis_donestthenokststelse(let st_done={stwithi=st.j}inokst_donest)}letall_str=all >|=Slice.to_string(*$= & ~printer:(errpp Q.Print.string) ~cmp:(erreq (=))
(Ok "abcd") (parse_string all_str "abcd")
(Ok "cd") (parse_string (string "ab" *> all_str) "abcd")
(Ok "") (parse_string (string "ab" *> all_str) "ab")
*)(*$= & ~printer:(errpp Q.Print.(pair string string)) ~cmp:(erreq (=))
(Ok ("foobar", "")) (parse_string (both all_str all_str) "foobar")
*)letfailmsg:_t={run=funst~ok:_~err->err(mk_error_st(const_str_msg))}letfailfmsg=Printf.ksprintffailmsgletfail_lazymsg ={run=funst~ok:_~err->err(mk_error_stmsg)}let parsingwhatp={run=funst~ok~err->p.runst~ok~err:(fune->letmsg()=Printf.sprintf"while parsing %s:\n%s"what(e.Error.msg())inerr{ewithError.msg})}letempty={run=funst~ok~err:_->okst();}letnop=emptylet any_char={run=funst~ok~err ->consume_st~ok~err}letcharc:_t={run=funst~ok~err ->consume_st~ok:(funstc2->ifchar_equalcc2thenokstcelse (letmsg()=Printf.sprintf"expected '%c', got '%c'"cc2inerr(mk_error_stmsg)))~err}letchar_if?descrp={run=funst~ok~err->consume_st~ok:(funstc->ifpcthenokstcelse(letmsg()=letrest =matchdescrwith|None ->""|Somed->Printf.sprintf", expected %s"dinPrintf.sprintf"unexpected char '%c'%s"crestinerr(mk_error_stmsg)))~err}lettake_ifp:slicet={run=funst~ok~err:_->leti=refst.iinwhileletst={stwithi=!i}innot(is_donest)&&p(curst)doincri;done;ok{stwithi=!i}{stwithj=!i}}lettake1_if?descrp=take_ifp>>=funsl->ifSlice.is_emptysl then(letmsg()=letwhat=matchdescrwith|None->""|Somed->Printf.sprintf" for%s"dinPrintf.sprintf"expected non-empty sequence of chars%s"whatinfail_lazymsg)else(returnsl)letchars_ifp=take_ifp>|=Slice.to_stringletchars1_if?descrp={run=funst ~ok~err->(chars_ifp).runst~ok:(funsts->ifstring_equals""then(letmsg()=letwhat =matchdescrwith|None ->""|Somed->Printf.sprintf" for%s"dinPrintf.sprintf"expected non-empty sequence of chars%s"whatinerr(mk_error_stmsg))elseoksts)~err}(*$QR Q.(printable_string) (fun s ->
let pred = (function 'a'..'z' | 'A' .. 'Z' | '{' | '}' -> true | _ -> false) in
let p1 = chars1_if pred in
let p2 = take1_if pred >|= Slice.to_string in
parse_string p1 s = parse_string p2 s)
*)(*$T
let pred = (function 'a'..'z' | 'A' .. 'Z' | '{' | '}' -> true | _ -> false) in \
parse_string (chars_if pred) "coucou{lol} 123" = Ok "coucou{lol}"
*)exceptionFold_failofstate*stringletchars_fold~facc0={run=funst~ok~err ->leti0=st.iinleti=refi0inletacc=refacc0inletcontinue=ref trueintrywhile !continuedoletst={stwithi=!i}inifis_donestthen(continue:=false;)else(letc=curstinmatchf!acccwith|`Continue acc'->incri;acc:=acc'|`Stopa->acc:=a;continue:=false;|`Consume_and_stopa->acc:=a;incri;continue:=false|`Failmsg->raise(Fold_fail(st,msg)))done;ok{stwithi=!i}(!acc,{stwith j=!i})with Fold_fail(st,msg)->err(mk_error_st(const_str_ msg))}letchars_fold_transduce~facc0={run=funst~ok~err ->leti0=st.iinleti=refi0inletacc=refacc0inletcontinue=ref trueinletbuf =Buffer.create16intrywhile!continuedoletst={stwithi=!i}inifis_donestthen(continue:=false;)else(letc=curstinmatchf!acccwith|`Continue acc'->incri;acc:=acc'|`Yield(acc',c')->incri;acc:=acc';Buffer.add_char bufc';|`Stop ->continue:=false;|`Consume_and_stop->incri;continue:=false|`Failmsg->raise(Fold_fail(st,msg)))done;ok{stwithi=!i}(!acc,Buffer.contentsbuf)withFold_fail(st,msg)->err(mk_error_st(const_str_ msg))}letskip_charsp:_t=letrecself={run=funst~ok~err->ifnot(is_donest)&&p(curst)then(letst=junk_stinself.runst~ok~err)elseokst()}inselflet is_alpha=function|'a'..'z'|'A'..'Z'->true|_->falseletis_num=function'0'..'9'->true|_->falseletis_alpha_num=function|'a'..'z' |'A'..'Z'|'0'..'9'->true|_->falseletis_space=function' '|'\t'->true|_->falseletis_white=function' '|'\t'|'\n'->true|_->falseletspace=char_ifis_spaceletwhite =char_ifis_whiteletendline=char_if~descr:"end-of-line ('\\n')"(function'\n'->true|_->false)letskip_space=skip_charsis_spaceletskip_white=skip_charsis_whitelettry_orp1~f~else_:p2={run=funst~ok~err->p1.runst~ok:(funstx->(fx).runst~ok~err)~err:(fun_->p2.runst~ok~err)}lettry_or_l?(msg="try_or_l ran out of options")?else_l:_t={run=funst~ok~err ->letrecloop=function|(test,p)::tl->test.runst~ok:(fun__-> p.runst~ok~err)(* commit *)~err:(fun_->looptl)|[]->begin matchelse_with|None->err(mk_error_st(const_str_msg))|Somep->p.runst~ok~errendinloop l}letsuspendf={run=funst~ok~err->letp=f()inp.runst~ok~err}(* read [len] chars at once *)lettakelen:slicet={run=funst~ok~err ->ifst.i+len<=st.jthen(letslice={stwithj=st.i+len}inletst={stwithi=st.i+len}inokstslice)else(letmsg()=Printf.sprintf"expected to be able to consume %d chars"leninerr(mk_error_stmsg))}letany_char_nlen:_t=takelen>|=Slice.to_stringletexacts={run=funst~ok~err->(* parse a string of length [String.length s] and compare with [s] *)(any_char_n(String.lengths)).runst~ok:(funsts2->ifstring_equalss2thenokstselse (letmsg()=Printf.sprintf"expected %S, got %S"ss2inerr(mk_error_stmsg)))~err}letstring=exactlet fixf=let recself={run=funst~ok~err->(Lazy.forcef_self).runst~ok~err}and f_self=lazy(fself)inselflettry_p=plettry_opt p:_t={run=funst~ok~err:_->p.runst~ok:(funstx->ok st(Somex))~err:(fun_->okstNone)}letoptionalp:_t={run=funst~ok~err:_->p.runst~ok:(funst_x->okst())~err:(fun_->okst())}letmany_until~until p:_t=fix(funself->try_oruntil~f:(fun_->pure[])~else_:(p>>=funx->self>|=funl->x::l))letmanyp:_t=fix(funself->try_orp~f:(funx->self>|=funtl->x::tl)~else_:(pure[]))(*
(* parse many [p], as a difference list *)
let many_rec_ p : (_ list -> _ list) t =
let rec self = {
run=fun st ~ok ~err ->
if is_done st then ok st (fun l->l) (* empty list *)
else (
p.run st
~ok:(fun st x ->
self.run st
~ok:(fun st f -> ok st (fun l -> x :: f l))
~err)
~err
)
} in
self
let many p : _ t = {
run=fun st ~ok ~err ->
(many_rec_ p).run st
~ok:(fun st f -> ok st (f []))
~err
}
*)(*$R
let p0 = skip_white *> U.int in
let p = (skip_white *> char '(' *> many p0) <* (skip_white <* char ')') in
let printer = CCFormat.(to_string @@ Dump.result @@ Dump.list int) in
assert_equal ~printer
(Ok [1;2;3]) (parse_string p "(1 2 3)");
assert_equal ~printer
(Ok [1;2; -30; 4]) (parse_string p "( 1 2 -30 4 )")
*)letmany1p=p>>=funx->manyp>|=funl->x::l(*skip can be made efficient by not allocating intermediate parsers *)letskipp:_t=letrecself={run=funst~ok~err->p.runst~ok:(funst_->self.runst~ok~err)~err:(fun_->okst())}inselfletsep_until ~until~byp=letrecread_p=lazy (p>>=funx->(until*>pure[x])<|>(by*>(Lazy.forceread_p>|=funtl->x::tl)))in(until*>pure[])<|>(Lazy.forceread_p)letsep~byp=letrecread_p=lazy (try_orp~f:(funx->(eoi*>pure[x])<|>try_orby~f:(fun_->Lazy.forceread_p>|=funtl->x::tl)~else_:(pure[x]))~else_:(pure[]))inLazy.forceread_p(*$inject
let aword = chars1_if (function 'a'..'z'|'A'..'Z'->true|_ -> false);;
*)(*$= & ~printer:(errpp Q.Print.(list string))
(Ok ["a";"b";"c"]) \
(parse_string (optional (char '/') *> sep ~by:(char '/') aword) "/a/b/c")
(Ok ["a";"b";"c"]) \
(parse_string (optional (char '/') *> sep ~by:(char '/') aword) "a/b/c")
*)letsep1~byp=p>>=funx->sep~byp>|=funtl->x::tlletlookaheadp:_t={run=funst~ok~err ->p.runst~ok:(fun_stx->okstx)(* discardp's newstate *)~err}letlookahead_ignorep:_t={run=funst~ok~err ->p.runst~ok:(fun_st_x->okst())~err}letset_current_slicesl:_t={run=fun_st ~ok~err:_->assertCCShims_.Stdlib.(_st.cs==sl.cs);oksl()(* jump to slice *)}(*$= & ~printer:(errpp Q.Print.(string))
(Ok "abc") (parse_string (lookahead (string "ab") *> (string "abc")) "abcd")
*)(*$= & ~printer:(errpp Q.Print.(string))
(Ok "1234") (parse_string line_str "1234\nyolo")
*)(*$= & ~printer:(errpp Q.Print.(pair String.escaped String.escaped))
(Ok ("1234", "yolo")) (parse_string (line_str ||| line_str) "1234\nyolo\nswag")
*)letsplit_1~on_char:_t={run=funst~ok~err:_->ifst.i>=st.jthen(ok st(st,None))else(matchString.index_fromst.cs.strst.ion_charwith|j->letx={stwithj}inlety={stwithi=minst.j(j+1)}inlet st_done={stwithi=st.j}in(* empty *)okst_done(x,Somey)|exception Not_found->letst_done={stwithi=st.j}in(* empty *)okst_done(st,None))}letsplit_list_at_most~on_charn:slicelistt=letrecloopaccn=ifn<=0then ((* addthe rest to [acc] *)all>|=funrest->letacc=rest::accinList.revacc)else(try_oreoi~f:(fun_->return(List.revacc))~else_:(parse_1accn))andparse_1accn=split_1~on_char>>=fun(sl1,rest)->letacc=sl1::accinmatchrestwith|None->return (List.revacc)|Somerest->recurse rest(loopacc(n-1))inloop[]n(*$= & ~printer:(errppQ.Print.(list string)) ~cmp:(erreq (=))
(Ok ["a";"b";"c";"d,e,f"]) \
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,b,c,d,e,f")
(Ok ["a";"bc"]) \
(parse_string (split_list_at_most ~on_char:',' 3 >|= List.map Slice.to_string) "a,bc")
*)letsplit_list~on_char:_t=split_list_at_most ~on_charmax_intletsplit_2~on_char:_t=split_list_at_most ~on_char3>>=function|[a;b]->return(a,b)|_->fail"split_2: expected 2 fields exactly"letsplit_3~on_char:_t=split_list_at_most ~on_char4>>=function|[a;b;c]->return(a,b,c)|_->fail"split_3: expected 3 fields exactly"letsplit_4~on_char:_t=split_list_at_most ~on_char5>>=function|[a;b;c;d]->return(a,b,c,d)|_->fail"split_4: expected 4 fields exactly"letsplit_list~on_char:slicelistt=letrec loopacc=try_oreoi~f:(fun_->return(List.revacc))~else_:(parse_1acc)andparse_1acc=split_1~on_char >>= fun(sl1,rest)->letacc=sl1::accinmatchrestwith|None->return (List.revacc)|Somerest->recurse rest(loopacc)inloop[]leteach_split~on_charp:'alistt=letrecloopacc=split_1~on_char>>=fun(sl1,rest)->(* parse [sl1] with [p] *)recursesl1p>>=funx->letacc=x::accinmatchrestwith|None->return (List.revacc)|Somerest->recurse rest(loopacc)inloop[]letline:slicet=split_1~on_char:'\n'>>=fun(sl,rest)->matchrestwith|None->returnsl|Somerest->set_current_slicerest>|=fun()->slletline_str=line>|=Slice.to_stringleteach_linep:_t=each_split~on_char:'\n'p(*$= & ~printer:(errppQ.Print.(list @@ list int))
(Ok ([[1;1];[2;2];[3;3];[]])) \
(parse_string (each_line (sep ~by:skip_space U.int)) "1 1\n2 2\n3 3\n")
*)letmemo(typea)(p:at):at=letid=!Memo_state.id_inincrMemo_state.id_;letr=refNonein(* used for universal encoding *){run=funst~ok~err->lettbl=matchst.cs.memowith|Somet->t|None->lettbl=Memo_tbl.create32inst.cs.memo<-Sometbl;tblinmatchr:=None;letf=Memo_tbl.findtbl(st.i,id)inf();!rwith|None->assertfalse|Some(Ok(st,x))->okstx|Some (Errore)->erre|exceptionNot_found->(* parse, and save *)p.runst~ok:(funst'x->Memo_tbl.replacetbl(st.i,id)(fun()->r:= Some(Ok (st',x)));okst'x)~err:(fun e->Memo_tbl.replacetbl(st.i,id)(fun()->r:= Some(Error e));erre)}letfix_memof=letrecp={run=funst~ok~err->(Lazy.forcep').runst~ok~err}and p'=lazy(memo(fp))inpexceptionParseErrorofError.tletstringify_result=function|Ok_asx->x|Errore->Error(Error.to_stringe)letparse_string_exnps=p.run(state_of_string s)~ok:(fun_stx->x)~err:(fune->raise(ParseErrore))letparse_string_eps=p.run(state_of_string s)~ok:(fun_stx->Okx)~err:(fune->Errore)letparse_stringps=parse_string_eps|>stringify_resultletread_all_ic=letbuf=Buffer.create1024inbegintrywhiletruedoletline=input_lineicinBuffer.add_string bufline;Buffer.add_charbuf'\n';done;assertfalsewithEnd_of_file->()end;Buffer.contentsbufletparse_file_epfile=letic=open_infile inlets=read_all_ ic inletr=parse_string_epsinclose_inic;rletparse_filepfile =parse_file_epfile|>stringify_resultletparse_file_exnpfile=matchparse_file_e pfilewith|Okx->x|Errore->raise(ParseErrore)moduleU=structletsep_=sepletlist?(start="[")?(stop="]")?(sep=";")p=stringstart*>skip_white*>sep_until~until:(skip_white<*stringstop)~by:(skip_white*>stringsep*>skip_white)pletint=skip_white*>chars1_if~descr:"integer"(func->is_numc||char_equalc'-')>>=funs->tryreturn(int_of_strings)withFailure_->fail"expected an int"(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
(Ok 42) (parse_string U.int " 42")
(Ok 2) (parse_string U.int "2")
(Error "") (parse_string U.int "abc")
(Error "") (parse_string U.int "")
*)letin_paren(p:'at):'at=skip_white *>(char'('*>skip_white*>p<*skip_white<*char')')letin_parens_opt(p:'at):'at=fix (funself->skip_white*>try_or(char'(')~f:(fun_->skip_white*>self<*skip_white<*char')')~else_:p)(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
(Ok 15) (parse_string (U.in_paren (U.in_paren U.int)) "( ( 15) )")
(Ok 2) (parse_string (U.in_paren U.int) "(2)")
(Error "") (parse_string (U.in_paren U.int) "2")
(Error "") (parse_string (U.in_paren U.int) "")
(Ok 2) (parse_string (U.in_parens_opt U.int) "((((2))))")
(Ok 2) (parse_string (U.in_parens_opt U.int) "2")
(Ok 200) (parse_string (U.in_parens_opt U.int) "( ( 200 ) )")
*)letoptionp=skip_white*>try_or(string"Some")~f:(fun_->skip_white*>p>|=funx->Somex)~else_:(string"None"*>returnNone)(*$= & ~printer:(errpp Q.Print.(option int)) ~cmp:(erreq (=))
(Ok (Some 12)) (parse_string U.(option int) " Some 12")
(Ok None) (parse_string U.(option int) " None")
(Ok (Some 0)) (parse_string U.(option int) "Some 0")
(Ok (Some 0)) (parse_string U.(in_parens_opt @@ option int) "(( Some 0) )")
*)lethexa_int=(exact"0x"<|>return"")*>beginchars1_if(function'0'..'9'|'a'..'f'|'A'..'F'->true|_->false)>|=funs->leti=ref0inString.iter(func->letn=matchcwith|'0'..'9'->Char.codec-Char.code'0'|'a'.. 'f'->Char.codec-Char.code'a'+10|'A'..'F'->Char.codec-Char.code'A'+10|_->assertfalseini:=!i*16+n)s;!iend(*$= & ~printer:(errpp Q.Print.int) ~cmp:(erreq (=))
(Ok 16) (parse_string U.hexa_int "0x10")
(Ok 16) (parse_string U.hexa_int "10")
(Error "") (parse_string U.hexa_int "x10")
(Error "") (parse_string U.hexa_int "0xz")
*)letprepend_strcs=String.make1c^sletword=map2prepend_str(char_ifis_alpha)(chars_ifis_alpha_num)letbool=skip_white*>((string"true"*>returntrue)<|>(string"false"*>returnfalse))(*$= & ~printer:(errpp Q.Print.bool) ~cmp:(erreq (=))
(Ok true) (parse_string U.bool "true")
(Ok false) (parse_string U.bool "false")
*)letpair?(start="(")?(stop=")")?(sep=",")p1p2=skip_white*>string start*>skip_white*>p1 >>=funx1->skip_white*>stringsep*>skip_white*>p2 >>=funx2->skip_white*>stringstop*>return(x1,x2)(*$= & ~printer:Q.Print.(errpp (pair int int))
(Ok(1,2)) U.(parse_string (pair int int) "(1 , 2 )")
*)lettriple?(start="(")?(stop=")")?(sep=",")p1p2p3=stringstart*>skip_white*>p1 >>=funx1->skip_white*>stringsep*>skip_white*>p2 >>=funx2->skip_white*>stringsep*>skip_white*>p3 >>=funx3->stringstop*>return(x1,x2,x3)endmoduleDebug_=structlettrace_failnamep={run=funst~ok~err->p.runst~ok~err:(fune->Printf.eprintf"trace %s: fail with %s\n%!"name(Error.to_stringe);erre)}lettrace_~bothname~printp={run=funst~ok~err->p.runst~ok:(funstx->Printf.eprintf"trace %s: parsed %s\n%!"name(printx);ok stx)~err:(fune->ifboththen(Printf.eprintf"trace %s: fail with %s\n%!"name(Error.to_stringe););erre)}lettrace_successname~printp=trace_~both:falsename~printplettrace_success_or_failname~printp=trace_~both:truename~printpend