123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306(******************************************************************************)(* OASIS: architecture for building OCaml libraries and applications *)(* *)(* Copyright (C) 2011-2016, Sylvain Le Gall *)(* Copyright (C) 2008-2011, OCamlCore SARL *)(* *)(* This library is free software; you can redistribute it and/or modify it *)(* under the terms of the GNU Lesser General Public License as published by *)(* the Free Software Foundation; either version 2.1 of the License, or (at *)(* your option) any later version, with the OCaml static compilation *)(* exception. *)(* *)(* This library is distributed in the hope that it will be useful, but *)(* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *)(* or FITNESS FOR A PARTICULAR PURPOSE. See the file COPYING for more *)(* details. *)(* *)(* You should have received a copy of the GNU Lesser General Public License *)(* along with this library; if not, write to the Free Software Foundation, *)(* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *)(******************************************************************************)(** Various string utilities.
Mostly inspired by extlib and batteries ExtString and BatString libraries.
@author Sylvain Le Gall
*)letnsplitfstrf=ifstr=""then[]elseletbuf=Buffer.create13inletlst=ref[]inletpush()=lst:=Buffer.contentsbuf::!lst;Buffer.clearbufinletstr_len=String.lengthstrinfori=0tostr_len-1doiffstr.[i]thenpush()elseBuffer.add_charbufstr.[i]done;push();List.rev!lst(** [nsplit c s] Split the string [s] at char [c]. It doesn't include the
separator.
*)letnsplitstrc=nsplitfstr((=)c)letfind~what?(offset=0)str=letwhat_idx=ref0inletstr_idx=refoffsetinwhile!str_idx<String.lengthstr&&!what_idx<String.lengthwhatdoifstr.[!str_idx]=what.[!what_idx]thenincrwhat_idxelsewhat_idx:=0;incrstr_idxdone;if!what_idx<>String.lengthwhatthenraiseNot_foundelse!str_idx-!what_idxletsub_startstrlen=letstr_len=String.lengthstriniflen>=str_lenthen""elseString.substrlen(str_len-len)letsub_end?(offset=0)strlen=letstr_len=String.lengthstriniflen>=str_lenthen""elseString.substr0(str_len-len)letstarts_with~what?(offset=0)str=letwhat_idx=ref0inletstr_idx=refoffsetinletok=reftrueinwhile!ok&&!str_idx<String.lengthstr&&!what_idx<String.lengthwhatdoifstr.[!str_idx]=what.[!what_idx]thenincrwhat_idxelseok:=false;incrstr_idxdone;!what_idx=String.lengthwhatletstrip_starts_with~whatstr=ifstarts_with~whatstrthensub_startstr(String.lengthwhat)elseraiseNot_foundletends_with~what?(offset=0)str=letwhat_idx=ref((String.lengthwhat)-1)inletstr_idx=ref((String.lengthstr)-1)inletok=reftrueinwhile!ok&&offset<=!str_idx&&0<=!what_idxdoifstr.[!str_idx]=what.[!what_idx]thendecrwhat_idxelseok:=false;decrstr_idxdone;!what_idx=-1letstrip_ends_with~whatstr=ifends_with~whatstrthensub_endstr(String.lengthwhat)elseraiseNot_foundletreplace_charsfs=letbuf=Buffer.create(String.lengths)inString.iter(func->Buffer.add_charbuf(fc))s;Buffer.contentsbufletlowercase_ascii=replace_chars(func->if(c>='A'&&c<='Z')thenChar.chr(Char.codec+32)elsec)letuncapitalize_asciis=ifs<>""then(lowercase_ascii(String.subs01))^(String.subs1((String.lengths)-1))elsesletuppercase_ascii=replace_chars(func->if(c>='a'&&c<='z')thenChar.chr(Char.codec-32)elsec)letcapitalize_asciis=ifs<>""then(uppercase_ascii(String.subs01))^(String.subs1((String.lengths)-1))elses(* END EXPORT *)(* TODO: replace lowercase_ascii, capitalize_ascii and uncapitalize_ascii
functions by String.*_ascii function when OCaml minimal version will be
4.03.0.
*)letis_whitespace=function|' '|'\r'|'\n'|'\t'->true|_->falselettokenize?(is_whitespace=is_whitespace)?(tokens=[])str=letlst=ref[]inletbuf=Buffer.create13inletidx=ref0inletpush()=(* Push the content of the buffer on the list. *)ifBuffer.lengthbuf>0thenbeginlst:=Buffer.contentsbuf::!lst;Buffer.clearbufendinletmatch_token()=List.exists(funtok->ifstarts_with~what:tok~offset:!idxstrthenbeginpush();lst:=tok::!lst;idx:=!idx+(String.lengthtok);trueendelsefalse)tokensinwhile!idx<String.lengthstrdoletc=str.[!idx]inifis_whitespacecthenbeginpush();incridxendelseifmatch_token()thenbegin()endelsebeginBuffer.add_charbufc;incridxenddone;push();List.rev!lstlettokenize_genlex?(tokens=[])str=letstrm=Genlex.make_lexertokens(Stream.of_stringstr)inletlst=ref[]inStream.iter(funtok->lst:=tok::!lst)strm;List.rev!lstletsplitstrc=letidx=String.indexstrcinString.substr0idx,String.substr(idx+1)(String.lengthstr-idx-1)lettrimstr=letstart_non_blank=ref0inletstop_non_blank=ref((String.lengthstr)-1)inwhile!start_non_blank<String.lengthstr&&is_whitespace(str.[!start_non_blank])doincrstart_non_blankdone;while!start_non_blank<=!stop_non_blank&&is_whitespace(str.[!stop_non_blank])dodecrstop_non_blankdone;String.substr!start_non_blank(!stop_non_blank-!start_non_blank+1)letfold_leftfaccstr=letracc=refaccinfori=0toString.lengthstr-1doracc:=f!raccstr.[i]done;!raccletcontains~whatstr=(* Implementation is naive. *)letlen_what=String.lengthwhatinletlen_str=String.lengthstrinletreccheckidx_whatidx_str=ifidx_what>=len_whatthentrueelseifidx_str>=len_strthenfalseelseifstr.[idx_str]=what.[idx_what]thencheck(idx_what+1)(idx_str+1)elsecheck0(idx_str+1)incheck00(** Split a list using ',' as separator. {b Not exported} *)letsplit_commastr=List.maptrim(nsplitstr',')(** Split a list using '\n' as separator. {b Not exported} *)letsplit_newline?(do_trim=true)str=letlst=nsplitstr'\n'inifdo_trimthenList.maptrimlstelselst(** Split a string containing '(...)' optionally. {b Not exported} *)letsplit_optional_parenthesesstr=tryletbeg_str,end_str=split(trimstr)'('inletcontent_str=strip_ends_with~what:")"end_strintrimbeg_str,Some(trimcontent_str)withNot_found->trimstr,None