camomileLibrary.mlp"moduleConfigInt=ConfigIntmoduleDefaultConfig=CamomileDefaultConfig(** Individual modules *)moduleOOChannel=OOChannelmoduleUChar=UCharmoduleUSet=USetmoduleUMap=UMapmoduleUCharTbl=UCharTblmoduleUnicodeString=UnicodeStringmoduleUText=UTextmoduleXString=XStringmoduleSubText=SubTextmoduleULine=ULinemoduleLocale=LocalemoduleUTF8=UTF8moduleUTF16=UTF16moduleUCS4=UCS4moduleUPervasives=UPervasivesmoduleURe=URemoduleCharEncoding=CharEncodingmoduleUCharInfo=UCharInfomoduleUNF=UNFmoduleUCol=UColmoduleCaseMap=CaseMapmoduleUReStr=UReStrmoduleStringPrep=StringPrep(** All-in-one, configure once at beginning module*)moduletypeType=sigmoduleOOChannel:sig# 1 "Camomile/public/oOChannel.mli"# 1 "Camomile/public/oOChannel.mli"(** Object Oriented Channel *)(* Copyright (C) 2002, 2003, 2010 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Generic input channel
Have the same interface of Polymorphic input channel of
http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm
the behaviour defined in the recommendation above.
*)classtype['a]obj_input_channel=objectmethodclose_in:unit->unitmethodget:unit->'aend(** Generic output channel
Have the same interface of Polymorphic output channel of
http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm
the behaviour defined in the recommendation above.
*)classtype['a]obj_output_channel=object(** If close_oout cannot output all buffered objects, flush raises
Failure *)methodclose_out:unit->unit(** If flush cannot output all buffered objects, flush raises
Failure *)methodflush:unit->unitmethodput:'a->unitend(** Convert stream to obj_input_channel *)class['a]channel_of_stream:'aStream.t->['a]obj_input_channel(** Convert obj_input_channel to stream *)valstream_of_channel:'a#obj_input_channel->'aStream.t(** Character(byte) input channel. Have the same interface of octet
input channel of http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm the
behaviour defined in the recommendation above. In addition, all
channels are assumed to be blocking. If you supply a non-blocking
channel to Camomile API, the outcome is undefined.
*)classtypechar_input_channel=objectmethodinput:Bytes.t->int->int->intmethodclose_in:unit->unitend(** Character(byte) output channel. Have the same interface of octet
input channel of http://www.ocaml-programming.de/rec/IO-Classes.html
All channels of Camomile having this interface must confirm the
behaviour defined in the recommendation above. In addition, all
channels are assumed to be blocking. If you supply a non-blocking
channel to Camomile API, the outcome is undefined.
*)classtypechar_output_channel=objectmethodoutput:Bytes.t->int->int->intmethodflush:unit->unitmethodclose_out:unit->unitend(** Convert a polymorphic input channel to a character input channel *)classchar_input_channel_of:char#obj_input_channel->char_input_channel(** Convert a character input channel to a polymorphic input channel*)classchar_obj_input_channel_of:char_input_channel->[char]obj_input_channel(** Convert a polymorphic output channel to a character output channel *)classchar_output_channel_of:char#obj_output_channel->char_output_channel(** Convert a character output channel to a polymorphic output channel *)classchar_obj_output_channel_of:char_output_channel->[char]obj_output_channel(** Convert an OCaml input channel to an OO-based character input channel *)classof_in_channel:Pervasives.in_channel->char_input_channel(** Convert an OCaml output channel to an OO-based character output channel *)classof_out_channel:Pervasives.out_channel->char_output_channel# 60 "camomileLibrary.mlp"endmoduleUChar:sig# 1 "Camomile/public/uChar.mli"# 1 "Camomile/public/uChar.mli"(** Unicode (ISO-UCS) characters.
This module implements Unicode (actually ISO-UCS) characters. All
31-bit code points are allowed.
*)(* Copyright (C) 2002, 2003, 2004 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Unicode characters. All 31bit code points are allowed.*)typetexceptionOut_of_range(** [char_of u] returns the Latin-1 representation of [u].
If [u] can not be represented by Latin-1, raises Out_of_range *)valchar_of:t->char(** [of_char c] returns the Unicode character of the Latin-1 character [c] *)valof_char:char->t(** [code u] returns the Unicode code number of [u].
If the value can not be represented by a positive integer,
raise Out_of_range *)valcode:t->int(** [code n] returns the Unicode character with the code number [n].
If n >= 2^32 or n < 0, raises [invalid_arg] *)valchr:int->t(** [uint_code u] returns the Unicode code number of [u].
The returned int is unsigned, that is, on 32-bits platforms,
the sign bit is used for storing the 31-th bit of the code number. *)externaluint_code:t->int="%identity"(** [chr_of_uint n] returns the Unicode character of the code number [n].
[n] is interpreted as unsigned, that is, on 32-bits platforms,
the sign bit is treated as the 31-th bit of the code number.
If n exceed 31-bits values, then raise [invalid_arg]. *)valchr_of_uint:int->t(** Equality by code point comparison *)valeq:t->t->bool(** [compare u1 u2] returns,
a value > 0 if [u1] has a larger Unicode code number than [u2],
0 if [u1] and [u2] are the same Unicode character,
a value < 0 if [u1] has a smaller Unicode code number than [u2]. *)valcompare:t->t->int(** Aliases of [type t] *)typeuchar=t(** Alias of [uint_code] *)valint_of:uchar->int(** Alias of [chr_of_uint] *)valof_int:int->uchar# 64 "camomileLibrary.mlp"endmoduleUSet:sig# 1 "Camomile/public/uSet.mli"# 1 "Camomile/public/uSet.mli"(** Sets of Unicode characters, implemented as sets of intervals.
The signature is mostly same to Set.S in stdlib *)(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)typetvalempty:tvalis_empty:t->boolvalmem:UChar.t->t->boolvaladd:UChar.t->t->t(** [add_range u1 u2 s] adds the characters in the range [u1] - [u2]
to [s]. The range is determined by the code point order. *)valadd_range:UChar.t->UChar.t->t->tvalsingleton:UChar.t->tvalremove:UChar.t->t->t(** [remove_range u1 u2 s] removes the characters in the range [u1] - [u2]
from [s]. The range is determined by the code point order. *)valremove_range:UChar.t->UChar.t->t->tvalunion:t->t->tvalinter:t->t->tvaldiff:t->t->t(** [compl s] returns the compliment of [s]. *)valcompl:t->tvalcompare:t->t->intvalequal:t->t->boolvalsubset:t->t->bool(** [from u s] returns the set of elements of [s]
whose code points are equal or greater than [u]. *)valfrom:UChar.t->t->t(** [after u s] returns the set of elements of [s]
whose code points are greater than [u]. *)valafter:UChar.t->t->t(** [until u s] returns the set of elements of [s]
whose code points are equal or smaller than [u]. *)valuntil:UChar.t->t->t(** [until u s] returns the set of elements of [s]
whose code points are smaller than [u]. *)valbefore:UChar.t->t->tvaliter:(UChar.t->unit)->t->unit(** [iter_range proc s] feeds the intervals contained in [s] to
[proc] in increasing order. The intervals given to [proc]
are always separated by the character not in [s]. *)valiter_range:(UChar.t->UChar.t->unit)->t->unitvalfold:(UChar.t->'a->'a)->t->'a->'a(** [fold_range f s x] is equivalent to
[f u_i u_(i+1) (... (f u_3 u_4 (f u_1 u_2 x)))] if [s] is consisted of
the intervals [u1]-[u2], [u3]-[u4], ..., [u_i]-[u_(i + 1)]
in increasing order. The intervals given to [proc]
are always separated by the character not in [s]. *)valfold_range:(UChar.t->UChar.t->'a->'a)->t->'a->'avalfor_all:(UChar.t->bool)->t->boolvalexists:(UChar.t->bool)->t->boolvalfilter:(UChar.t->bool)->t->tvalpartition:(UChar.t->bool)->t->t*tvalcardinal:t->intvalelements:t->UChar.tlist(** The list of the intervals contained in the set.
The returned intervals are always separated
by the character not in [s]. *)valranges:t->(UChar.t*UChar.t)listvalmin_elt:t->UChar.tvalmax_elt:t->UChar.t(** Returns a element roughly in the middle of the set.
It is not guaranteed to return the same element for
the sets with the same elements *)valchoose:t->UChar.tvaluset_of_iset:ISet.t->tvaliset_of_uset:t->ISet.t# 68 "camomileLibrary.mlp"endmoduleUMap:sig# 1 "Camomile/public/uMap.mli"# 1 "Camomile/public/uMap.mli"(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Maps over Unicode characters. *)type'atvalempty:'atvalis_empty:'at->bool(** [add ?eq u v m] returns the new map which is same to [m]
except it maps [u] to some value [v'] which satisfies [eq v v'].
If [eq] is not supplied, structural equality is used. *)valadd:?eq:('a->'a->bool)->UChar.t->'a->'at->'at(** [add ?eq u1 u2 v m] returns the new map which is same to [m]
except it maps characters in the range [u1]-[u2]
to some value [v'] which satisfies [eq v v'].
If [eq] is not supplied, structural equality is used. *)valadd_range:?eq:('a->'a->bool)->UChar.t->UChar.t->'a->'at->'atvalfind:UChar.t->'at->'avalremove:UChar.t->'at->'at(** [remove_range u1 u2 m] removes [u1]-[u2] from the domain of [m] *)valremove_range:UChar.t->UChar.t->'at->'at(** [from u m] restricts the domain of [m] to the characters whose
code points are equal or greater than [u]. *)valfrom:UChar.t->'at->'at(** [after u m] restricts the domain of [m] to the characters whose
code points are greater than [u]. *)valafter:UChar.t->'at->'at(** [until u m] restricts the domain of [m] to the characters whose
code points are equal or smaller than [u]. *)valuntil:UChar.t->'at->'at(** [before u m] restricts the domain of [m] to the characters whose
code points are smaller than [u]. *)valbefore:UChar.t->'at->'atvalmem:UChar.t->'at->boolvaliter:(UChar.t->'a->unit)->'at->unit(** [iter proc m] : For each contingent region [u1]-[u2]
that is mapped to a constant [v], [proc u1 u2 v] is called.
The order of call is determined by increasing order on [u1]. *)valiter_range:(UChar.t->UChar.t->'a->unit)->'at->unit(** [map ?eq f m] and [mapi ?eq f m] : Similar to [map] and [mapi]
in stdlib Map, but if the map [m'] is returned, it is only guaranteed
that [eq (find u m') (f (find u m ))] is true for [map] and
[eq (find u m') (f u (find u m ))] is true for [mapi]. If [eq] is
not specified, structural equality is used. *)valmap:?eq:('b->'b->bool)->('a->'b)->'at->'btvalmapi:?eq:('b->'b->bool)->(UChar.t->'a->'b)->'at->'btvalfold:(UChar.t->'b->'a->'a)->'bt->'a->'a(** [fold_range f m x] is equivalent to
[f u_(2n) u_(2n+1) v_n (... (f u_1 u_2 v_1 x))] where all characters in
the range [u_(2k)]-[u_(2k+1)] are mapped to [v_k] and
[u_1] < [u_3] < ... in code point order.
For each range [u_(2k)]-[u_(2k+1)] is separated by a character
which is not mapped to [v_k]. *)valfold_range:(UChar.t->UChar.t->'b->'a->'a)->'bt->'a->'a(** Constant map.*)valset_to_map:USet.t->'a->'at(** Domain. *)valdomain:'at->USet.t(** [map_to_set p m] returns the set of characters which are mapped
to values satisfying the predicate [p] by [m]. *)valmap_to_set:('a->bool)->'at->USet.tvalumap_of_imap:'aIMap.t->'atvalimap_of_umap:'at->'aIMap.t# 72 "camomileLibrary.mlp"endmoduleUCharTbl:sig# 1 "Camomile/public/uCharTbl.mli"# 1 "Camomile/public/uCharTbl.mli"(** Fast lookup tables for Unicode. Accessible by constant time. *)(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Fast lookup tables. Accessible by constant time. *)type'atbltype'at='atblvalget:'atbl->UChar.t->'amoduletypeType=sigtypeelttypet=elttblvalget:elttbl->UChar.t->elt(** [of_map def m] creates the table which has the same value to [m].
The table returns [def] for the characters for which [m] is undefined. *)valof_map:elt->eltUMap.t->tend(** Equality and hash are necessary for table generation. *)moduleMake:functor(H:Hashtbl.HashedType)->(Typewithtypeelt=H.t)(** Tables for boolean values. *)moduleBool:sigtypetvalget:t->UChar.t->boolvalof_set:USet.t->tend(** Tables for small (< 256, >=0) integers *)moduleBits:sigtypetvalof_map:int->intUMap.t->tvalget:t->UChar.t->intend(** Tables for integers. If integers are not span the whole 31-bit or
63-bit values, [Bytes.t] is more space efficient than [int tbl]. *)moduleBytes:sigtypetvalof_map:int->intUMap.t->tvalget:t->UChar.t->intend(** Tables for bytes. *)moduleChar:sigtypetvalof_map:char->charUMap.t->tvalget:t->UChar.t->charend# 76 "camomileLibrary.mlp"endmoduleUnicodeString:sig# 1 "Camomile/public/unicodeString.mli"# 1 "Camomile/public/unicodeString.mli"(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Signature for Unicode strings.
{!UText}, {!XString}, {!UTF8}, {!UTF16}, {!UCS4}
have matched signatures to UStorage
and satisfy the semantics described below. If users want to supply
their own Unicode strings, please design the module with the
following signature and properties. *)moduletypeType=sig(** The type of string. *)typet(** [get t i] : [i]-th character of the storage.*)valget:t->int->UChar.t(** [init len f] creates a new storage.
the returned storage has length [len], its nth-element is [f n].
[f] is called with integers [0 ... len - 1], only once for each integer.
The call is in the increasing order f 0, f 1, f 2, ... *)valinit:int->(int->UChar.t)->t(** The number of Unicode characters in the storage *)vallength:t->int(** locations in storages.*)typeindex(** [look t i] : The character in the location [i] of [t].*)vallook:t->index->UChar.t(** [nth t n] : the location of the [n]-th character in [t].*)valnth:t->int->index(** [next x i, prev x i] :
The operation is valid if [i] points the valid element, i.e. the
returned value may point the location beyond valid elements by one.
If [i] does not point a valid element, the results are unspecified. *)valnext:t->index->indexvalprev:t->index->index(* [out_of_range t i] tests whether [i] is inside of [t]. *)valout_of_range:t->index->boolvaliter:(UChar.t->unit)->t->unit(* Code point comparison *)valcompare:t->t->int(** The location of the first character in the storage. *)valfirst:t->index(** The location of the last character in the storage. *)vallast:t->index(** [move t i n] :
if [n] >= 0, then returns [n]-th character after [i] and
otherwise returns -[n]-th character before [i].
If there is no such character, or [i] does not point
a valid character, the result is unspecified. *)valmove:t->index->int->index(** [compare_index t i j] returns
a positive integer if [i] is the location placed after [j] in [t],
0 if [i] and [j] point the same location, and
a negative integer if [i] is the location placed before [j] in [t]. *)valcompare_index:t->index->index->int(** Character buffers. Similar to Buffer. *)moduleBuf:sigtypebuf(** [create n] creates the buffer. [n] is used to determine
the initial size of the buffer. The meaning of [n] differs from
modules to modules. *)valcreate:int->bufvalcontents:buf->tvalclear:buf->unitvalreset:buf->unitvaladd_char:buf->UChar.t->unitvaladd_string:buf->t->unitvaladd_buffer:buf->buf->unitendend# 80 "camomileLibrary.mlp"endmoduleUText:sig# 1 "Camomile/public/uText.mli"# 1 "Camomile/public/uText.mli"(** An implementation of Unicode string. *)(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** An implementation of Unicode string.
Internally, it uses integer array.
The semantics matches the description of UStorage. *)(** Phantom type for distinguishing mutability *)typemutability=[`Mutable|`Immutable]type'atexttypeutext=[`Immutable]texttypeustring=[`Mutable]texttypet=utextvalutext_of_ustring:ustring->utextvalustring_of_utext:utext->ustringvalget:'atext->int->UChar.t(** [set s i u] sets the [i]-th character in [s] to [u]. *)valset:ustring->int->UChar.t->unittypeindexvallook:'atext->index->UChar.tvalnth:'atext->int->indexvalfirst:'atext->indexvallast:'atext->indexvalout_of_range:'atext->index->boolvalcompare_index:'atext->index->index->intvalnext:'atext->index->indexvalprev:'atext->index->indexvalmove:'atext->index->int->indexvallength:'atext->int(** Conversion from Latin-1 strings. *)valof_string:string->utextvalinit:int->(int->UChar.t)->utextvalinit_ustring:int->(int->UChar.t)->ustring(** The semantics of these function are similar to
the equivalents of string. *)valmake:int->UChar.t->ustringvalcopy:ustring->ustringvalsub:'atext->int->int->'atextvalfill:ustring->int->int->UChar.t->unitvalblit:'atext->int->ustring->int->int->unitvalappend:'atext->'btext->'atextvaliter:(UChar.t->unit)->'atext->unitvalcompare:'atext->'btext->intmoduleBuf:sigtypebuf(** [create n] creates the buffer which initially can contain
[n] Unicode characters. *)valcreate:int->bufvalcontents:buf->tvalcontents_string:buf->ustringvallength:buf->intvalclear:buf->unitvalreset:buf->unitvaladd_char:buf->UChar.t->unitvaladd_string:buf->'atext->unitvaladd_buffer:buf->buf->unitend# 84 "camomileLibrary.mlp"endmoduleXString:sig# 1 "Camomile/public/xString.mli"# 1 "Camomile/public/xString.mli"(** eXtensible Unicode string.
The semantics matches the description of UStorage.
The detail may be going to change.*)(* Copyright 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)typexstringtypet=xstringvalget:xstring->int->UChar.tvalset:xstring->int->UChar.t->unitvallength:xstring->intvalinit:int->(int->UChar.t)->xstringtypeindexvallook:xstring->index->UChar.tvalnth:xstring->int->indexvalfirst:xstring->indexvallast:xstring->indexvalout_of_range:xstring->index->boolvalnext:xstring->index->indexvalprev:xstring->index->indexvalmove:xstring->index->int->indexvalcompare_index:xstring->index->index->intvalmake:?bufsize:int->int->UChar.t->xstringvalclear:xstring->unitvalreset:xstring->unitvalcopy:xstring->xstringvalsub:xstring->int->int->xstringvaladd_char:xstring->UChar.t->unitvaladd_text:xstring->'aUText.text->unitvaladd_xstring:xstring->xstring->unitvalshrink:xstring->int->unitvalappend:xstring->xstring->xstringvalutext_of:xstring->UText.tvalustring_of:xstring->UText.ustringvaliter:(UChar.t->unit)->xstring->unitvalcompare:t->t->intmoduleBuf:sigtypebufvalcreate:int->bufvalcontents:buf->tvallength:buf->intvalclear:buf->unitvalreset:buf->unitvaladd_char:buf->UChar.t->unitvaladd_string:buf->t->unitvaladd_buffer:buf->buf->unitend# 88 "camomileLibrary.mlp"endmoduleSubText:sig# 1 "Camomile/public/subText.mli"# 1 "Camomile/public/subText.mli"(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Sub-texts, parts of original (ur-) texts.
The signature and semantics matches those of UStorage. *)moduletypeType=sigtypetvalget:t->int->UChar.tvalinit:int->(int->UChar.t)->tvallength:t->inttypeindexvallook:t->index->UChar.tvalnth:t->int->indexvalfirst:t->indexvallast:t->indexvalnext:t->index->indexvalprev:t->index->indexvalmove:t->index->int->indexvalout_of_range:t->index->boolvalcompare_index:t->index->index->intvaliter:(UChar.t->unit)->t->unitvalcompare:t->t->intmoduleBuf:sigtypebufvalcreate:int->bufvalcontents:buf->tvalclear:buf->unitvalreset:buf->unitvaladd_char:buf->UChar.t->unitvaladd_string:buf->t->unitvaladd_buffer:buf->buf->unitend(** The type of original texts. *)typeur_text(** The type of indexes of original texts. *)typeur_index(** [refer t i j] returns the part of [t] from [i] until [j].
The character pointed by [j] is not included in the result.
If [j] is equal to [i] or located before [j], the result is
an empty string. *)valrefer:ur_text->ur_index->ur_index->t(** [excerpt t] copies the contents of [t] as a new ur_text. *)valexcerpt:t->ur_text(** [context t] returns the tuple [(s, i, j)] such that
[t = refer s i j]. *)valcontext:t->ur_text*ur_index*ur_index(** Conversion from indexes of sub-texts to ur_texts. *)valur_index_of:t->index->ur_indexendmoduleMake:functor(Text:UnicodeString.Type)->(Typewithtypeur_text=Text.tandtypeur_index=Text.index)# 92 "camomileLibrary.mlp"endmoduleULine:sig# 1 "Camomile/public/uLine.mli"# 1 "Camomile/public/uLine.mli"(** Line IO *)(* Copyright (C) 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Line I/O, conversion of line separators. *)openOOChannel(** Line separators.
- [`CR] specifies carriage return.
- [`LF] specifies linefeed.
- [`CRLF] specifies the sequence of carriage return and linefeed.
- [`NEL] specifies next line (\u0085).
- [`LS] specifies Unicode line separator (\u2028).
- [`PS] specifies Unicode paragraph separator (\u2029). *)typeseparator=[`CR|`LF|`CRLF|`NEL|`LS|`PS](** [new input separator input_obj] creates the new input channel object
{!OOChannel.obj_input_channel} which reads from [input_obj] and
converts line separators (all of CR, LF, CRLF, NEL, LS, PS) to
[separator]. *)classinput:separator->UChar.t#obj_input_channel->[UChar.t]obj_input_channel(** [new output separator output_obj] creates the new output channel
object {!OOChannel.obj_output_channel} which receives Unicode characters
and converts line separators (all of CR, LF, CRLF, NEL, LS, PS) to
[separator]. *)classoutput:separator->UChar.t#obj_output_channel->[UChar.t]obj_output_channelmoduletypeType=sigtypetext(** [new input_line input_obj] creates the new input channel object
{!OOChannel.obj_input_channel} which reads Unicode characters
from [input_obj] and output lines. All of CR, LF, CRLF, NEL, LS, PS,
as well as FF (formfeed) are recognised as a line separator. *)classinput_line:UChar.t#obj_input_channel->[text]obj_input_channel(** [new output_line ~sp output_obj] create the new output channel object
{!OOChannel.obj_output_channel} which output each line to [output_obj]
using [sp] as a line separator.
If [sp] is omitted, linefeed (LF) is used. *)classoutput_line:?sp:separator->UChar.t#obj_output_channel->[text]obj_output_channelendmoduleMake:functor(Text:UnicodeString.Type)->(Typewithtypetext=Text.t)# 96 "camomileLibrary.mlp"endmoduleLocale:sig# 1 "Camomile/public/locale.mli"# 1 "Camomile/public/locale.mli"(* Copyright (C) 2003 Yamagata Yoriyuki *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Camomile has a locale system similar to Java.
A locale is a string with a form as
"<LANG>_<COUNTRY>_<MODIFIER>..." where <LANG> is
a 2-letter ISO 639 language code, <COUNTRY> is a 2-letter ISO 3166
country code. Some field may not present. *)(** Type of locales. *)typet=string(** [read root suffix reader locale]
reads locale information using [reader].
Locale data is supposed to reside in [root] directory with
the name [locale].[suffix].
[reader] takes [in_channel] as an argument and read data from in_channel.
If data is not found, then [reader] should raise Not_found.
If the file is not found or [reader] raises Not_found, then
more generic locales are tried.
For example, if fr_CA.[suffix] is not found, then [read] tries fr.[suffix].
If fr.[suffix] is also not found, then the file [root].[suffix] is tried.
Still the data is not found, then [Not_found] is raised. *)valread:string->string->(in_channel->'a)->string->'a(** [contain loc1 loc2] :
If [loc1] is contained in [loc2] then true otherwise false.
For example, "fr" is contained in "fr_CA" while "en_CA"
does not contain "fr" *)valcontain:string->string->bool# 100 "camomileLibrary.mlp"endmoduleCharEncoding:CharEncoding.InterfacemoduleUTF8:sig# 1 "Camomile/public/uTF8.mli"# 1 "Camomile/public/uTF8.mli"(** UTF-8 encoded Unicode strings. The type is normal string. *)(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** UTF-8 encoded Unicode strings. The type is normal string. *)typet=stringexceptionMalformed_code(** [validate s]
successes if s is valid UTF-8, otherwise raises Malformed_code.
Other functions assume strings are valid UTF-8, so it is prudent
to test their validity for strings from untrusted origins. *)valvalidate:t->unit(* All functions below assume string are valid UTF-8. If not,
* the result is unspecified. *)(** [get s n] returns [n]-th Unicode character of [s].
The call requires O(n)-time. *)valget:t->int->UChar.t(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialized by [f i] *)valinit:int->(int->UChar.t)->t(** [length s] returns the number of Unicode characters contained in s *)vallength:t->int(** Positions in the string represented by the number of bytes from the head.
The location of the first character is [0] *)typeindex=int(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)valnth:t->int->index(** The position of the head of the first Unicode character. *)valfirst:t->index(** The position of the head of the last Unicode character. *)vallast:t->index(** [look s i]
returns the Unicode character of the location [i] in the string [s]. *)vallook:t->index->UChar.t(** [out_of_range s i]
tests whether [i] is a position inside of [s]. *)valout_of_range:t->index->bool(** [compare_index s i1 i2] returns
a value < 0 if [i1] is the position located before [i2],
0 if [i1] and [i2] points the same location,
a value > 0 if [i1] is the position located after [i2]. *)valcompare_index:t->index->index->int(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
If [i] is inside of [s], the function always successes.
If [i] is inside of [s] and there is no Unicode character after [i],
the position outside [s] is returned.
If [i] is not inside of [s], the behaviour is unspecified. *)valnext:t->index->index(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
If [i] is inside of [s], the function always successes.
If [i] is inside of [s] and there is no Unicode character before [i],
the position outside [s] is returned.
If [i] is not inside of [s], the behaviour is unspecified. *)valprev:t->index->index(** [move s i n]
returns [n]-th Unicode character after [i] if n >= 0,
[n]-th Unicode character before [i] if n < 0.
If there is no such character, the result is unspecified. *)valmove:t->index->int->index(** [iter f s]
applies [f] to all Unicode characters in [s].
The order of application is same to the order
of the Unicode characters in [s]. *)valiter:(UChar.t->unit)->t->unit(** Code point comparison by the lexicographic order.
[compare s1 s2] returns
a positive integer if [s1] > [s2],
0 if [s1] = [s2],
a negative integer if [s1] < [s2]. *)valcompare:t->t->int(** Buffer module for UTF-8 strings *)moduleBuf:sig(** Buffers for UTF-8 strings. *)typebuf(** [create n] creates the buffer with the initial size [n]-bytes. *)valcreate:int->buf(* The rest of functions is similar to the ones of Buffer in stdlib. *)(** [contents buf] returns the contents of the buffer. *)valcontents:buf->t(** Empty the buffer,
but retains the internal storage which was holding the contents *)valclear:buf->unit(** Empty the buffer and de-allocate the internal storage. *)valreset:buf->unit(** Add one Unicode character to the buffer. *)valadd_char:buf->UChar.t->unit(** Add the UTF-8 string to the buffer. *)valadd_string:buf->t->unit(** [add_buffer b1 b2] adds the contents of [b2] to [b1].
The contents of [b2] is not changed. *)valadd_buffer:buf->buf->unitendwithtypebuf=Buffer.t# 106 "camomileLibrary.mlp"endmoduleUTF16:sig# 1 "Camomile/public/uTF16.mli"# 1 "Camomile/public/uTF16.mli"(* Copyright (C) 2002, 2003, Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** UTF-16 encoded string. the type is the bigarray of 16-bit integers.
The characters must be 21-bits code points, and not surrogate points,
0xfffe, 0xffff.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used. *)typet=(int,Bigarray.int16_unsigned_elt,Bigarray.c_layout)Bigarray.Array1.texceptionMalformed_code(** [validate s]
If [s] is valid UTF-16 then successes otherwise raises [Malformed_code].
Other functions assume strings are valid UTF-16, so it is prudent
to test their validity for strings from untrusted origins. *)valvalidate:t->unit(** All functions below assume strings are valid UTF-16. If not,
the result is unspecified. *)(** [get s n] returns [n]-th Unicode character of [s].
The call requires O(n)-time. *)valget:t->int->UChar.texceptionOut_of_range(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialized by [f i]
if the character is not representable, raise [Out_of_range]. *)valinit:int->(int->UChar.t)->t(** [length s] returns the number of Unicode characters contained in s *)vallength:t->int(** Positions in the string represented by the number of 16-bit unit
from the head.
The location of the first character is [0] *)typeindex=int(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)valnth:t->int->index(** [first s] : The position of the head of the last Unicode character. *)valfirst:t->index(** [last s] : The position of the head of the last Unicode character. *)vallast:t->index(** [look s i ]
returns the Unicode character of the location [i] in the string [s]. *)vallook:t->index->UChar.t(** [out_of_range s i] tests whether [i] is inside of [s]. *)valout_of_range:t->index->bool(** [compare_aux s i1 i2] returns
- If [i1] is the position located before [i2], a value < 0,
- If [i1] and [i2] points the same location, 0,
- If [i1] is the position located after [i2], a value > 0.
*)valcompare_index:t->index->index->int(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
- If [i] is a valid position, the function always success.
- If [i] is a valid position and there is no Unicode character after [i],
the position outside [s] is returned.
- If [i] is not a valid position, the behaviour is undefined.
*)valnext:t->index->index(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
- If [i] is a valid position, the function always success.
- If [i] is a valid position and there is no Unicode character before [i],
the position outside [s] is returned.
- If [i] is not a valid position, the behaviour is undefined.
*)valprev:t->index->index(* [move s i n]
- If n >= 0, returns [n]-th Unicode character after [i].
- If n < 0, returns [-n]-th Unicode character before [i].
0 If there is no such character, the result is unspecified.
*)valmove:t->index->int->index(** [iter f s]
Apply [f] to all Unicode characters in [s].
The order of application is same to the order
in the Unicode characters in [s]. *)valiter:(UChar.t->unit)->t->unit(** Code point comparison *)valcompare:t->t->int(** Buffer module for UTF-16 *)moduleBuf:sigtypebuf(** create n : creates the buffer with the initial size [n]. *)valcreate:int->buf(** The rest of functions is similar to the ones of Buffer in stdlib. *)valcontents:buf->tvalclear:buf->unitvalreset:buf->unit(** if the character is not representable, raise Out_of_range *)valadd_char:buf->UChar.t->unitvaladd_string:buf->t->unitvaladd_buffer:buf->buf->unitend# 110 "camomileLibrary.mlp"endmoduleUCS4:sig# 1 "Camomile/public/uCS4.mli"# 1 "Camomile/public/uCS4.mli"(** UCS4 encoded string. The type is the bigarray of 32-bit integers.
Bigarray.cma or Bigarray.cmxa must be linked when this module is used. *)(* Copyright (C) 2002, 2003, 2004 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)typet=(int32,Bigarray.int32_elt,Bigarray.c_layout)Bigarray.Array1.texceptionMalformed_code(** [validate s]
If [s] is valid UCS4 then successes otherwise raises [Malformed_code].
Other functions assume strings are valid UCS4, so it is prudent
to test their validity for strings from untrusted origins. *)valvalidate:t->unit(** All functions below assume strings are valid UCS4. If not,
the result is unspecified. *)(** [get s n] returns [n]-th Unicode character of [s]. *)valget:t->int->UChar.t(** [init len f]
returns a new string which contains [len] Unicode characters.
The i-th Unicode character is initialised by [f i] *)valinit:int->(int->UChar.t)->t(** [length s] returns the number of Unicode characters contained in [s] *)vallength:t->int(** Positions in the string represented by the number of characters
from the head.
The location of the first character is [0] *)typeindex=int(** [nth s n] returns the position of the [n]-th Unicode character.
The call requires O(n)-time *)valnth:t->int->index(** [first s] : The position of the head of the last Unicode character. *)valfirst:t->index(** [last s] : The position of the head of the last Unicode character. *)vallast:t->index(** [look s i]
returns the Unicode character of the location [i] in the string [s]. *)vallook:t->index->UChar.t(** [out_of_range s i]
tests whether [i] points the valid position of [s]. *)valout_of_range:t->index->bool(** [compare_aux s i1 i2] returns
If [i1] is the position located before [i2], a value < 0,
If [i1] and [i2] points the same location, 0,
If [i1] is the position located after [i2], a value > 0. *)valcompare_index:t->index->index->int(** [next s i]
returns the position of the head of the Unicode character
located immediately after [i].
If [i] is a valid position, the function always success.
If [i] is a valid position and there is no Unicode character after [i],
the position outside [s] is returned.
If [i] is not a valid position, the behaviour is undefined. *)valnext:t->index->index(** [prev s i]
returns the position of the head of the Unicode character
located immediately before [i].
If [i] is a valid position, the function always success.
If [i] is a valid position and there is no Unicode character before [i],
the position outside [s] is returned.
If [i] is not a valid position, the behaviour is undefined. *)valprev:t->index->index(** [move s i n] :
If n >= 0, returns [n]-th Unicode character after [i].
If n < 0, returns [-n]-th Unicode character before [i].
If there is no such character, the result is unspecified. *)valmove:t->index->int->index(** [iter f s] :
Apply [f] to all Unicode characters in [s].
The order of application is same to the order
in the Unicode characters in [s]. *)valiter:(UChar.t->unit)->t->unit(** Code point comparison *)valcompare:t->t->int(** Buffer module for UCS4 *)moduleBuf:sigtypebuf(** [create n] creates the buffer with the initial size [n]. *)valcreate:int->buf(** The rest of functions is similar to the ones of Buffer in stdlib. *)valcontents:buf->tvalclear:buf->unitvalreset:buf->unitvaladd_char:buf->UChar.t->unitvaladd_string:buf->t->unitvaladd_buffer:buf->buf->unitend# 114 "camomileLibrary.mlp"endmoduleUPervasives:sig# 1 "Camomile/public/uPervasives.mli"# 1 "Camomile/public/uPervasives.mli"(** Functions for toplevel *)(* Copyright (C) 2002, 2003 Yamagata Yoriyuki. *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)typeuchar=UChar.t(** Aliases for UChar.uint_code, UChar.chr_of_uint *)valint_of_uchar:uchar->intvaluchar_of_int:int->ucharvalescaped_uchar:uchar->stringvalescaped_utf8:string->stringvalprinter_utf8:Format.formatter->string->unitvalprinter_uchar:Format.formatter->uchar->unit# 118 "camomileLibrary.mlp"endmoduleURe:sig# 1 "Camomile/public/uRe.mli"# 1 "Camomile/public/uRe.mli"(** Regular expression engine. *)(* Copyright (C) 2003 Yamagata Yoriyuki. distributed with LGPL *)(* This library is free software; you can redistribute it and/or *)(* modify it under the terms of the GNU Lesser General Public License *)(* as published by the Free Software Foundation; either version 2 of *)(* the License, or (at your option) any later version. *)(* As a special exception to the GNU Library General Public License, you *)(* may link, statically or dynamically, a "work that uses this library" *)(* with a publicly distributed version of this library to produce an *)(* executable file containing portions of this library, and distribute *)(* that executable file under terms of your choice, without any of the *)(* additional requirements listed in clause 6 of the GNU Library General *)(* Public License. By "a publicly distributed version of this library", *)(* we mean either the unmodified Library as distributed by the authors, *)(* or a modified version of this library that is distributed under the *)(* conditions defined in clause 3 of the GNU Library General Public *)(* License. This exception does not however invalidate any other reasons *)(* why the executable file might be covered by the GNU Library General *)(* Public License . *)(* This library is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *)(* Lesser General Public License for more details. *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this library; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 *)(* USA *)(* You can contact the authour by sending email to *)(* yori@users.sourceforge.net *)(** Abstract syntax trees of regular expressions. *)typeregexp=[`Altofregexp*regexp|`Seqofregexp*regexp|`Repofregexp|`Repnofregexp*int*intoption|`Afterofregexp|`Beforeofregexp|`Epsilon|`Groupofregexp|`OneChar|`StringofUChar.tlist|`SetofUSet.t|`BoS|`EoS](** Match semantics. *)typematch_semantics=[`First|`Shortest|`Longest](** Remove [`Group] from the regular expressions. *)valno_group:regexp->regexpmoduletypeType=sigtypetexttypeindextypecompiled_regexpmoduleSubText:SubText.Typewithtypeur_text=textandtypeur_index=index(** Compile regular expressions. *)valcompile:regexp->compiled_regexp(** [regexp_match ?sem r t i] tries matching [r] and substrings
of [t] beginning from [i]. If match successes, [Some g] is
returned where [g] is the array containing the matched
string of [n]-th group in the [n]-element.
The matched string of the whole [r] is stored in the [0]-th element.
If matching fails, [None] is returned. *)valregexp_match:?sem:match_semantics->compiled_regexp->text->index->SubText.toptionarrayoption(** [string_match r t i] tests whether [r] can match a substring
of [t] beginning from [i]. *)valstring_match:compiled_regexp->text->index->bool(** [search_forward ?sem r t i] searches a substring of [t]
matching [r] from [i]. The returned value is similar to
{!URe.Type.regexp_match}. *)valsearch_forward:?sem:match_semantics->compiled_regexp->text->index->SubText.toptionarrayoptionendmoduleMake:functor(Text:UnicodeString.Type)->Typewithtypetext=Text.tandtypeindex=Text.index# 122 "camomileLibrary.mlp"endmoduleUCharInfo:UCharInfo.TypemoduleUNF:sigmoduletypeType=UNF.TypemoduleMake(Text:UnicodeString.Type):Typewithtypetext=Text.tandtypeindex=Text.indexendmoduleUCol:sig(** How variables are handled *)typevariable_option=[`Blanked|`Non_ignorable|`Shifted|`Shift_Trimmed](** Strength of comparison. For European languages, each strength
roughly means as
`Primary : Ignore accents and case
`Secondary : Ignore case but accents are counted in.
`Tertiary : Accents and case are counted in.
For the case of `Shifted, `Shift_Trimmed, there is the fourth strength.
`Quaternary : Variables such as - (hyphen) are counted in. *)typeprecision=[`Primary|`Secondary|`Tertiary|`Quaternary]moduletypeType=UCol.TypemoduleMake(Text:UnicodeString.Type):Typewithtypetext=Text.tandtypeindex=Text.indexendmoduleCaseMap:sigmoduletypeType=CaseMap.TypemoduleMake(Text:UnicodeString.Type):(Typewithtypetext=Text.t)endmoduleUReStr:UReStr.InterfacemoduleStringPrep:sigmoduletypeType=StringPrep.TypemoduleMake(Text:UnicodeString.Type):(Typewithtypetext=Text.t)endendmoduleMake(Config:ConfigInt.Type)=structmoduleOOChannel=OOChannelmoduleUChar=UCharmoduleUSet=USetmoduleUMap=UMapmoduleUCharTbl=UCharTblmoduleUnicodeString=UnicodeStringmoduleUText=UTextmoduleXString=XStringmoduleSubText=SubTextmoduleULine=ULinemoduleLocale=LocalemoduleCharEncoding=CharEncoding.Configure(Config)moduleUTF8=UTF8moduleUTF16=UTF16moduleUCS4=UCS4moduleUPervasives=UPervasivesmoduleURe=URemoduleUCharInfo=UCharInfo.Make(Config)moduleUNF=structmoduletypeType=UNF.TypemoduleMake=UNF.Make(Config)endmoduleUCol=structtypevariable_option=[`Blanked|`Non_ignorable|`Shifted|`Shift_Trimmed]typeprecision=[`Primary|`Secondary|`Tertiary|`Quaternary]moduletypeType=UCol.TypemoduleMake=UCol.Make(Config)endmoduleCaseMap=structmoduletypeType=CaseMap.TypemoduleMake=CaseMap.Make(Config)endmoduleUReStr=UReStr.Configure(Config)moduleStringPrep=structmoduletypeType=StringPrep.TypemoduleMake=StringPrep.Make(Config)endend