123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224openCore(* Given a function to get the next char of the input, returns a function to get the next
block of transformed input. Strings returned via `Ok always have nonzero length. *)letread_of_next_char:next_char:(unit->charoption)->(unit->[`Okofstring|`Eof])Staged.t=fun~next_char->(* These are string that could trigger comment-mode in the sexp lexer OR lead to parse
errors - we make sure they get quoted so that they get interpreted as atoms instead *)letshould_be_quotedc=matchcwith|';'|'|'|'#'|')'->true|_->falsein(* The transformation necessary to turn a raw atom that didn't appear with double quotes
into a string that will parse to the same character sequence once it does get
double-quoted *)letescape=unstage(String.Escaping.escape~escapeworthy:['"']~escape_char:'\\')inletmaybe_quote_not_inside_string_atoms=ifString.existss~f:should_be_quotedthen"\""^escapes^"\""elsesin(* These are characters that signal the end of the current atom when not inside a string *)letterminates_atomc~paren_depth=matchcwith|'('|'"'|' '|'\t'|'\012'|'\n'|'\r'->true|')'whenInt.(>)!paren_depth0->true|_->falsein(* State variables *)letparen_depth=ref0inletinside_string=reffalseinletfollows_escape_in_string=reffalseinletatom_so_far=Buffer.create32inletall_done=reffalsein(* Read from the in_channel and either return `Eof or return `Ok s where s is a chunk
of input, possibly zero-length *)letread()=if!all_donethen`Eofelse(matchnext_char()with|Somec->if(* Inside string *)!inside_stringthen((* If we followed an escape character, we always take the next char verbatim *)letfollowed_escape_in_string=!follows_escape_in_stringinfollows_escape_in_string:=false;iffollowed_escape_in_stringthen(Buffer.add_charatom_so_farc;`Ok""(* Else... *))else(matchcwith(* A quote terminates the string and we return it *)|'"'->Buffer.add_charatom_so_farc;lets=Buffer.contentsatom_so_farinBuffer.clearatom_so_far;inside_string:=false;`Oks(* Any other character gets added to the string, and if it's an escape
character, we remember this *)|c->ifChar.equalc'\\'thenfollows_escape_in_string:=true;Buffer.add_charatom_so_farc;`Ok""(* Not inside string *)))elseif(* Chars that don't terminate the atom just get appended and we continue *)not(terminates_atomc~paren_depth)then(Buffer.add_charatom_so_farc;`Ok""(* Else... *))else((* We have a naked atom that didn't appear as a string in the sexp - quote
if it needed *)letret=Buffer.contentsatom_so_farinBuffer.clearatom_so_far;letret=maybe_quote_not_inside_string_atomretin(* Then handle the character that terminated the atom *)matchcwith(* Parens change the depth and then get output *)|'('->incrparen_depth;`Ok(ret^String.of_charc)|')'->decrparen_depth;`Ok(ret^String.of_charc)(* Whitespace simply gets output *)|' '|'\t'|'\012'|'\n'|'\r'->`Ok(ret^String.of_charc)(* Quotes send us into string mode *)|'"'->inside_string:=true;Buffer.add_charatom_so_farc;`Okret|_->assertfalse)(* End of in-channel input *)|None->letret=(* If inside a string, then to prevent parse errors, finish up the string *)if!inside_stringthen((* If there was an escape char without anything after it, complete that too *)if!follows_escape_in_stringthenBuffer.add_charatom_so_far'\\';Buffer.add_charatom_so_far'"';letret=Buffer.contentsatom_so_farinBuffer.clearatom_so_far;ret(* Else if not inside a string, finish up any naked atom and quote as needed *))else(letret=Buffer.contentsatom_so_farinBuffer.clearatom_so_far;maybe_quote_not_inside_string_atomret)in(* Then add parens to get our paren depth back to 0 *)while!paren_depth>0doBuffer.add_charatom_so_far')';decrparen_depthdone;(* Yay! *)all_done:=true;`Ok(ret^Buffer.contentsatom_so_far))in(* Transform the step function so that it never returns Ok "" *)letrecread_until()=matchread()with|`Ok""->read_until()|`Oks->`Oks|`Eof->`Eofinstageread_until;;letlexbuf_of_channelchan=letnext_char()=In_channel.input_charchaninletread=unstage(read_of_next_char~next_char)in(* Tuple of string, chars used in string *)letleftover=ref("",0)in(* Read up to n chars into bytes, for lexer *)letlex_funbytesn=letresult=ifString.length(fst!leftover)-snd!leftover>0then(lets=!leftoverinleftover:="",0;`Oks)else(matchread()with|`Eof->`Eof|`Oks->`Ok(s,0))inmatchresultwith|`Eof->0|`Ok(s,used)->ifString.lengths-used>nthen(Bytes.From_string.blit~src_pos:used~dst_pos:0~src:s~dst:bytes~len:n;leftover:=s,used+n;n)else(Bytes.From_string.blit~src_pos:used~dst_pos:0~src:s~dst:bytes~len:(String.lengths-used);String.lengths-used)inLexing.from_functionlex_fun;;lettransform_strings=letpos=ref0inletnext_char()=if!pos>=String.lengthsthenNoneelse(letc=s.[!pos]inincrpos;Somec)inletread=unstage(read_of_next_char~next_char)inletbuf=Buffer.create(String.lengths)inletrecloop()=matchread()with|`Eof->Buffer.contentsbuf|`Oks->Buffer.add_stringbufs;loop()inloop();;openString.Replace_polymorphic_compareletunchangeds=transform_strings=slet%test_=unchanged""let%test_=unchanged"abc"let%test_=unchanged"()"let%test_=unchanged"bf((a)d((c\"eg\")))"let%test_=unchanged" d ( ef) \n (\r\t ) \\ \\m x \") \b\r (\""let%test_=unchanged"%!@&*^:'?/,.~`[}]{-+=_-"let%test_=unchanged"\"foo\\\"d\""let%test"completes unmatched parens"=transform_string"("="()"let%test"completes unmatched parens"=transform_string"(a)(b(()(c"="(a)(b(()(c)))"let%test"completes unmatched quotes"=transform_string"\""="\"\""let%test"completes unmatched quotes"=transform_string"\"\\\""="\"\\\"\""let%test"completes unmatched quotes"=transform_string"((\"ab"="((\"ab\"))"let%test"completes unmatched escape in string"=transform_string"\"\\"="\"\\\\\""let%test"stringifies extra close parens"=transform_string")"="\")\""let%test"stringifies extra close parens"=transform_string")(())))())"="\")\"(())\"))\"()\")\"";;let%test"turns sexp special chars to strings"=transform_string"#"="\"#\""let%test"turns sexp special chars to strings"=transform_string";"="\";\""let%test"turns sexp special chars to strings"=transform_string"|"="\"|\""let%test"turns sexp special chars to strings"=transform_string"## |#| (#a;) ;a\"bc\"|\n;#)|"="\"##\" \"|#|\" (\"#a;\") \";a\"\"bc\"\"|\"\n\";#)|\"";;