123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597(* The package sedlex is released under the terms of an MIT-like license. *)(* See the attached LICENSE file. *)(* Copyright 2005, 2013 by Alain Frisch and LexiFi. *)(* Character sets are represented as lists of intervals. The
intervals must be non-overlapping and not collapsable, and the list
must be ordered in increasing order. *)typet=(int*int)listletmax_code=0x10ffff(* must be < max_int *)letmin_code=-1letempty=[]letsingletoni=[(i,i)]letis_empty=function[]->true|_->falseletintervalij=ifi<=jthen[(i,j)]else[(j,i)]leteof=singleton(-1)letany=interval0max_codeletrecunionc1c2=match(c1,c2)with|[],_->c2|_,[]->c1|((i1,j1)ass1)::r1,(i2,j2)::r2->ifi1<=i2thenifj1+1<i2thens1::unionr1c2elseifj1<j2thenunionr1((i1,j2)::r2)elseunionc1r2elseunionc2c1letcomplementc=letrecauxstart=function|[]->ifstart<=max_codethen[(start,max_code)]else[]|(i,j)::l->(start,i-1)::aux(succj)linmatchcwith(-1,j)::l->aux(succj)l|l->aux(-1)lletintersectionc1c2=complement(union(complementc1)(complementc2))letdifferencec1c2=complement(union(complementc1)c2)(* Unicode classes from XML *)letbase_char=[(0x0041,0x005A);(0x0061,0x007A);(0x00C0,0x00D6);(0x00D8,0x00F6);(0x00F8,0x00FF);(0x0100,0x0131);(0x0134,0x013E);(0x0141,0x0148);(0x014A,0x017E);(0x0180,0x01C3);(0x01CD,0x01F0);(0x01F4,0x01F5);(0x01FA,0x0217);(0x0250,0x02A8);(0x02BB,0x02C1);(0x0386,0x0386);(0x0388,0x038A);(0x038C,0x038C);(0x038E,0x03A1);(0x03A3,0x03CE);(0x03D0,0x03D6);(0x03DA,0x03DA);(0x03DC,0x03DC);(0x03DE,0x03DE);(0x03E0,0x03E0);(0x03E2,0x03F3);(0x0401,0x040C);(0x040E,0x044F);(0x0451,0x045C);(0x045E,0x0481);(0x0490,0x04C4);(0x04C7,0x04C8);(0x04CB,0x04CC);(0x04D0,0x04EB);(0x04EE,0x04F5);(0x04F8,0x04F9);(0x0531,0x0556);(0x0559,0x0559);(0x0561,0x0586);(0x05D0,0x05EA);(0x05F0,0x05F2);(0x0621,0x063A);(0x0641,0x064A);(0x0671,0x06B7);(0x06BA,0x06BE);(0x06C0,0x06CE);(0x06D0,0x06D3);(0x06D5,0x06D5);(0x06E5,0x06E6);(0x0905,0x0939);(0x093D,0x093D);(0x0958,0x0961);(0x0985,0x098C);(0x098F,0x0990);(0x0993,0x09A8);(0x09AA,0x09B0);(0x09B2,0x09B2);(0x09B6,0x09B9);(0x09DC,0x09DD);(0x09DF,0x09E1);(0x09F0,0x09F1);(0x0A05,0x0A0A);(0x0A0F,0x0A10);(0x0A13,0x0A28);(0x0A2A,0x0A30);(0x0A32,0x0A33);(0x0A35,0x0A36);(0x0A38,0x0A39);(0x0A59,0x0A5C);(0x0A5E,0x0A5E);(0x0A72,0x0A74);(0x0A85,0x0A8B);(0x0A8D,0x0A8D);(0x0A8F,0x0A91);(0x0A93,0x0AA8);(0x0AAA,0x0AB0);(0x0AB2,0x0AB3);(0x0AB5,0x0AB9);(0x0ABD,0x0ABD);(0x0AE0,0x0AE0);(0x0B05,0x0B0C);(0x0B0F,0x0B10);(0x0B13,0x0B28);(0x0B2A,0x0B30);(0x0B32,0x0B33);(0x0B36,0x0B39);(0x0B3D,0x0B3D);(0x0B5C,0x0B5D);(0x0B5F,0x0B61);(0x0B85,0x0B8A);(0x0B8E,0x0B90);(0x0B92,0x0B95);(0x0B99,0x0B9A);(0x0B9C,0x0B9C);(0x0B9E,0x0B9F);(0x0BA3,0x0BA4);(0x0BA8,0x0BAA);(0x0BAE,0x0BB5);(0x0BB7,0x0BB9);(0x0C05,0x0C0C);(0x0C0E,0x0C10);(0x0C12,0x0C28);(0x0C2A,0x0C33);(0x0C35,0x0C39);(0x0C60,0x0C61);(0x0C85,0x0C8C);(0x0C8E,0x0C90);(0x0C92,0x0CA8);(0x0CAA,0x0CB3);(0x0CB5,0x0CB9);(0x0CDE,0x0CDE);(0x0CE0,0x0CE1);(0x0D05,0x0D0C);(0x0D0E,0x0D10);(0x0D12,0x0D28);(0x0D2A,0x0D39);(0x0D60,0x0D61);(0x0E01,0x0E2E);(0x0E30,0x0E30);(0x0E32,0x0E33);(0x0E40,0x0E45);(0x0E81,0x0E82);(0x0E84,0x0E84);(0x0E87,0x0E88);(0x0E8A,0x0E8A);(0x0E8D,0x0E8D);(0x0E94,0x0E97);(0x0E99,0x0E9F);(0x0EA1,0x0EA3);(0x0EA5,0x0EA5);(0x0EA7,0x0EA7);(0x0EAA,0x0EAB);(0x0EAD,0x0EAE);(0x0EB0,0x0EB0);(0x0EB2,0x0EB3);(0x0EBD,0x0EBD);(0x0EC0,0x0EC4);(0x0F40,0x0F47);(0x0F49,0x0F69);(0x10A0,0x10C5);(0x10D0,0x10F6);(0x1100,0x1100);(0x1102,0x1103);(0x1105,0x1107);(0x1109,0x1109);(0x110B,0x110C);(0x110E,0x1112);(0x113C,0x113C);(0x113E,0x113E);(0x1140,0x1140);(0x114C,0x114C);(0x114E,0x114E);(0x1150,0x1150);(0x1154,0x1155);(0x1159,0x1159);(0x115F,0x1161);(0x1163,0x1163);(0x1165,0x1165);(0x1167,0x1167);(0x1169,0x1169);(0x116D,0x116E);(0x1172,0x1173);(0x1175,0x1175);(0x119E,0x119E);(0x11A8,0x11A8);(0x11AB,0x11AB);(0x11AE,0x11AF);(0x11B7,0x11B8);(0x11BA,0x11BA);(0x11BC,0x11C2);(0x11EB,0x11EB);(0x11F0,0x11F0);(0x11F9,0x11F9);(0x1E00,0x1E9B);(0x1EA0,0x1EF9);(0x1F00,0x1F15);(0x1F18,0x1F1D);(0x1F20,0x1F45);(0x1F48,0x1F4D);(0x1F50,0x1F57);(0x1F59,0x1F59);(0x1F5B,0x1F5B);(0x1F5D,0x1F5D);(0x1F5F,0x1F7D);(0x1F80,0x1FB4);(0x1FB6,0x1FBC);(0x1FBE,0x1FBE);(0x1FC2,0x1FC4);(0x1FC6,0x1FCC);(0x1FD0,0x1FD3);(0x1FD6,0x1FDB);(0x1FE0,0x1FEC);(0x1FF2,0x1FF4);(0x1FF6,0x1FFC);(0x2126,0x2126);(0x212A,0x212B);(0x212E,0x212E);(0x2180,0x2182);(0x3041,0x3094);(0x30A1,0x30FA);(0x3105,0x312C);(0xAC00,0xD7A3);]letideographic=[(0x3007,0x3007);(0x3021,0x3029);(0x4E00,0x9FA5)]letcombining_char=[(0x0300,0x0345);(0x0360,0x0361);(0x0483,0x0486);(0x0591,0x05A1);(0x05A3,0x05B9);(0x05BB,0x05BD);(0x05BF,0x05BF);(0x05C1,0x05C2);(0x05C4,0x05C4);(0x064B,0x0652);(0x0670,0x0670);(0x06D6,0x06DC);(0x06DD,0x06DF);(0x06E0,0x06E4);(0x06E7,0x06E8);(0x06EA,0x06ED);(0x0901,0x0903);(0x093C,0x093C);(0x093E,0x094C);(0x094D,0x094D);(0x0951,0x0954);(0x0962,0x0963);(0x0981,0x0983);(0x09BC,0x09BC);(0x09BE,0x09BE);(0x09BF,0x09BF);(0x09C0,0x09C4);(0x09C7,0x09C8);(0x09CB,0x09CD);(0x09D7,0x09D7);(0x09E2,0x09E3);(0x0A02,0x0A02);(0x0A3C,0x0A3C);(0x0A3E,0x0A3E);(0x0A3F,0x0A3F);(0x0A40,0x0A42);(0x0A47,0x0A48);(0x0A4B,0x0A4D);(0x0A70,0x0A71);(0x0A81,0x0A83);(0x0ABC,0x0ABC);(0x0ABE,0x0AC5);(0x0AC7,0x0AC9);(0x0ACB,0x0ACD);(0x0B01,0x0B03);(0x0B3C,0x0B3C);(0x0B3E,0x0B43);(0x0B47,0x0B48);(0x0B4B,0x0B4D);(0x0B56,0x0B57);(0x0B82,0x0B83);(0x0BBE,0x0BC2);(0x0BC6,0x0BC8);(0x0BCA,0x0BCD);(0x0BD7,0x0BD7);(0x0C01,0x0C03);(0x0C3E,0x0C44);(0x0C46,0x0C48);(0x0C4A,0x0C4D);(0x0C55,0x0C56);(0x0C82,0x0C83);(0x0CBE,0x0CC4);(0x0CC6,0x0CC8);(0x0CCA,0x0CCD);(0x0CD5,0x0CD6);(0x0D02,0x0D03);(0x0D3E,0x0D43);(0x0D46,0x0D48);(0x0D4A,0x0D4D);(0x0D57,0x0D57);(0x0E31,0x0E31);(0x0E34,0x0E3A);(0x0E47,0x0E4E);(0x0EB1,0x0EB1);(0x0EB4,0x0EB9);(0x0EBB,0x0EBC);(0x0EC8,0x0ECD);(0x0F18,0x0F19);(0x0F35,0x0F35);(0x0F37,0x0F37);(0x0F39,0x0F39);(0x0F3E,0x0F3E);(0x0F3F,0x0F3F);(0x0F71,0x0F84);(0x0F86,0x0F8B);(0x0F90,0x0F95);(0x0F97,0x0F97);(0x0F99,0x0FAD);(0x0FB1,0x0FB7);(0x0FB9,0x0FB9);(0x20D0,0x20DC);(0x20E1,0x20E1);(0x302A,0x302F);(0x3099,0x3099);(0x309A,0x309A);]letdigit=[(0x0030,0x0039);(0x0660,0x0669);(0x06F0,0x06F9);(0x0966,0x096F);(0x09E6,0x09EF);(0x0A66,0x0A6F);(0x0AE6,0x0AEF);(0x0B66,0x0B6F);(0x0BE7,0x0BEF);(0x0C66,0x0C6F);(0x0CE6,0x0CEF);(0x0D66,0x0D6F);(0x0E50,0x0E59);(0x0ED0,0x0ED9);(0x0F20,0x0F29);]letextender=[(0x00B7,0x00B7);(0x02D0,0x02D1);(0x0387,0x0387);(0x0640,0x0640);(0x0E46,0x0E46);(0x0EC6,0x0EC6);(0x3005,0x3005);(0x3031,0x3035);(0x309D,0x309E);(0x30FC,0x30FE);]letblank=[(0x0009,0x000A);(0x000D,0x000D);(0x0020,0x0020)]letletter=unionbase_charideographic(* Letters to be used in identifiers, as specified
by ISO ....
Data provided by John M. Skaller *)lettr8876_ident_char=[(* ASCII *)(0x0041,0x005a);(0x0061,0x007a);(* Latin *)(0x00c0,0x00d6);(0x00d8,0x00f6);(0x00f8,0x01f5);(0x01fa,0x0217);(0x0250,0x02a8);(* Greek *)(0x0384,0x0384);(0x0388,0x038a);(0x038c,0x038c);(0x038e,0x03a1);(0x03a3,0x03ce);(0x03d0,0x03d6);(0x03da,0x03da);(0x03dc,0x03dc);(0x03de,0x03de);(0x03e0,0x03e0);(0x03e2,0x03f3);(* Cyrillic *)(0x0401,0x040d);(0x040f,0x044f);(0x0451,0x045c);(0x045e,0x0481);(0x0490,0x04c4);(0x04c7,0x04c4);(0x04cb,0x04cc);(0x04d0,0x04eb);(0x04ee,0x04f5);(0x04f8,0x04f9);(* Armenian *)(0x0531,0x0556);(0x0561,0x0587);(0x04d0,0x04eb);(* Hebrew *)(0x05d0,0x05ea);(0x05f0,0x05f4);(* Arabic *)(0x0621,0x063a);(0x0640,0x0652);(0x0670,0x06b7);(0x06ba,0x06be);(0x06c0,0x06ce);(0x06e5,0x06e7);(* Devanagari *)(0x0905,0x0939);(0x0958,0x0962);(* Bengali *)(0x0985,0x098c);(0x098f,0x0990);(0x0993,0x09a8);(0x09aa,0x09b0);(0x09b2,0x09b2);(0x09b6,0x09b9);(0x09dc,0x09dd);(0x09df,0x09e1);(0x09f0,0x09f1);(* Gurmukhi *)(0x0a05,0x0a0a);(0x0a0f,0x0a10);(0x0a13,0x0a28);(0x0a2a,0x0a30);(0x0a32,0x0a33);(0x0a35,0x0a36);(0x0a38,0x0a39);(0x0a59,0x0a5c);(0x0a5e,0x0a5e);(* Gunjarati *)(0x0a85,0x0a8b);(0x0a8d,0x0a8d);(0x0a8f,0x0a91);(0x0a93,0x0aa8);(0x0aaa,0x0ab0);(0x0ab2,0x0ab3);(0x0ab5,0x0ab9);(0x0ae0,0x0ae0);(* Oriya *)(0x0b05,0x0b0c);(0x0b0f,0x0b10);(0x0b13,0x0b28);(0x0b2a,0x0b30);(0x0b32,0x0b33);(0x0b36,0x0b39);(0x0b5c,0x0b5d);(0x0b5f,0x0b61);(* Tamil *)(0x0b85,0x0b8a);(0x0b8e,0x0b90);(0x0b92,0x0b95);(0x0b99,0x0b9a);(0x0b9c,0x0b9c);(0x0b9e,0x0b9f);(0x0ba3,0x0ba4);(0x0ba8,0x0baa);(0x0bae,0x0bb5);(0x0bb7,0x0bb9);(* Telugu *)(0x0c05,0x0c0c);(0x0c0e,0x0c10);(0x0c12,0x0c28);(0x0c2a,0x0c33);(0x0c35,0x0c39);(0x0c60,0x0c61);(* Kannada *)(0x0c85,0x0c8c);(0x0c8e,0x0c90);(0x0c92,0x0ca8);(0x0caa,0x0cb3);(0x0cb5,0x0cb9);(0x0ce0,0x0ce1);(* Malayam *)(0x0d05,0x0d0c);(0x0d0e,0x0d10);(0x0d12,0x0d28);(0x0d2a,0x0d39);(0x0d60,0x0d61);(* Thai *)(0x0e01,0x0e30);(0x0e32,0x0e33);(0x0e40,0x0e46);(0x0e4f,0x0e5b);(* Lao *)(0x0e81,0x0e82);(0x0e84,0x0e84);(0x0e87,0x0e88);(0x0e8a,0x0e8a);(0x0e0d,0x0e0d);(0x0e94,0x0e97);(0x0e99,0x0e9f);(0x0ea1,0x0ea3);(0x0ea5,0x0ea5);(0x0ea7,0x0ea7);(0x0eaa,0x0eab);(0x0ead,0x0eb0);(0x0eb2,0x0eb3);(0x0ebd,0x0ebd);(0x0ec0,0x0ec4);(0x0ec6,0x0ec6);(* Georgian *)(0x10a0,0x10c5);(0x10d0,0x10f6);(* Hangul Jamo *)(0x1100,0x1159);(0x1161,0x11a2);(0x11a8,0x11f9);(0x11d0,0x11f6);(* Latin extensions *)(0x1e00,0x1e9a);(0x1ea0,0x1ef9);(* Greek extended *)(0x1f00,0x1f15);(0x1f18,0x1f1d);(0x1f20,0x1f45);(0x1f48,0x1f4d);(0x1f50,0x1f57);(0x1f59,0x1f59);(0x1f5b,0x1f5b);(0x1f5d,0x1f5d);(0x1f5f,0x1f7d);(0x1f80,0x1fb4);(0x1fb6,0x1fbc);(0x1fc2,0x1fc4);(0x1fc6,0x1fcc);(0x1fd0,0x1fd3);(0x1fd6,0x1fdb);(0x1fe0,0x1fec);(0x1ff2,0x1ff4);(0x1ff6,0x1ffc);(* Hiragana *)(0x3041,0x3094);(0x309b,0x309e);(* Katakana *)(0x30a1,0x30fe);(* Bopmofo *)(0x3105,0x312c);(* CJK Unified Ideographs *)(0x4e00,0x9fa5);(* CJK Compatibility Ideographs *)(0xf900,0xfa2d);(* Arabic Presentation Forms *)(0xfb1f,0xfb36);(0xfb38,0xfb3c);(0xfb3e,0xfb3e);(0xfb40,0xfb41);(0xfb42,0xfb44);(0xfb46,0xfbb1);(0xfbd3,0xfd35);(* Arabic Presentation Forms-A *)(0xfd50,0xfd85);(0xfd92,0xfbc7);(0xfdf0,0xfdfb);(* Arabic Presentation Forms-B *)(0xfe70,0xfe72);(0xfe74,0xfe74);(0xfe76,0xfefc);(* Half width and Fullwidth Forms *)(0xff21,0xff3a);(0xff41,0xff5a);(0xff66,0xffbe);(0xffc2,0xffc7);(0xffca,0xffcf);(0xffd2,0xffd7);(0xffd2,0xffd7);(0xffda,0xffdc);]