[25] | 1 | /* |
---|
| 2 | * regc_locale.c -- |
---|
| 3 | * |
---|
| 4 | * This file contains the Unicode locale specific regexp routines. |
---|
| 5 | * This file is #included by regcomp.c. |
---|
| 6 | * |
---|
| 7 | * Copyright (c) 1998 by Scriptics Corporation. |
---|
| 8 | * |
---|
| 9 | * See the file "license.terms" for information on usage and redistribution of |
---|
| 10 | * this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
| 11 | * |
---|
| 12 | * RCS: @(#) $Id: regc_locale.c,v 1.20 2007/12/13 15:23:14 dgp Exp $ |
---|
| 13 | */ |
---|
| 14 | |
---|
| 15 | /* ASCII character-name table */ |
---|
| 16 | |
---|
| 17 | static const struct cname { |
---|
| 18 | const char *name; |
---|
| 19 | const char code; |
---|
| 20 | } cnames[] = { |
---|
| 21 | {"NUL", '\0'}, |
---|
| 22 | {"SOH", '\001'}, |
---|
| 23 | {"STX", '\002'}, |
---|
| 24 | {"ETX", '\003'}, |
---|
| 25 | {"EOT", '\004'}, |
---|
| 26 | {"ENQ", '\005'}, |
---|
| 27 | {"ACK", '\006'}, |
---|
| 28 | {"BEL", '\007'}, |
---|
| 29 | {"alert", '\007'}, |
---|
| 30 | {"BS", '\010'}, |
---|
| 31 | {"backspace", '\b'}, |
---|
| 32 | {"HT", '\011'}, |
---|
| 33 | {"tab", '\t'}, |
---|
| 34 | {"LF", '\012'}, |
---|
| 35 | {"newline", '\n'}, |
---|
| 36 | {"VT", '\013'}, |
---|
| 37 | {"vertical-tab", '\v'}, |
---|
| 38 | {"FF", '\014'}, |
---|
| 39 | {"form-feed", '\f'}, |
---|
| 40 | {"CR", '\015'}, |
---|
| 41 | {"carriage-return", '\r'}, |
---|
| 42 | {"SO", '\016'}, |
---|
| 43 | {"SI", '\017'}, |
---|
| 44 | {"DLE", '\020'}, |
---|
| 45 | {"DC1", '\021'}, |
---|
| 46 | {"DC2", '\022'}, |
---|
| 47 | {"DC3", '\023'}, |
---|
| 48 | {"DC4", '\024'}, |
---|
| 49 | {"NAK", '\025'}, |
---|
| 50 | {"SYN", '\026'}, |
---|
| 51 | {"ETB", '\027'}, |
---|
| 52 | {"CAN", '\030'}, |
---|
| 53 | {"EM", '\031'}, |
---|
| 54 | {"SUB", '\032'}, |
---|
| 55 | {"ESC", '\033'}, |
---|
| 56 | {"IS4", '\034'}, |
---|
| 57 | {"FS", '\034'}, |
---|
| 58 | {"IS3", '\035'}, |
---|
| 59 | {"GS", '\035'}, |
---|
| 60 | {"IS2", '\036'}, |
---|
| 61 | {"RS", '\036'}, |
---|
| 62 | {"IS1", '\037'}, |
---|
| 63 | {"US", '\037'}, |
---|
| 64 | {"space", ' '}, |
---|
| 65 | {"exclamation-mark",'!'}, |
---|
| 66 | {"quotation-mark", '"'}, |
---|
| 67 | {"number-sign", '#'}, |
---|
| 68 | {"dollar-sign", '$'}, |
---|
| 69 | {"percent-sign", '%'}, |
---|
| 70 | {"ampersand", '&'}, |
---|
| 71 | {"apostrophe", '\''}, |
---|
| 72 | {"left-parenthesis",'('}, |
---|
| 73 | {"right-parenthesis", ')'}, |
---|
| 74 | {"asterisk", '*'}, |
---|
| 75 | {"plus-sign", '+'}, |
---|
| 76 | {"comma", ','}, |
---|
| 77 | {"hyphen", '-'}, |
---|
| 78 | {"hyphen-minus", '-'}, |
---|
| 79 | {"period", '.'}, |
---|
| 80 | {"full-stop", '.'}, |
---|
| 81 | {"slash", '/'}, |
---|
| 82 | {"solidus", '/'}, |
---|
| 83 | {"zero", '0'}, |
---|
| 84 | {"one", '1'}, |
---|
| 85 | {"two", '2'}, |
---|
| 86 | {"three", '3'}, |
---|
| 87 | {"four", '4'}, |
---|
| 88 | {"five", '5'}, |
---|
| 89 | {"six", '6'}, |
---|
| 90 | {"seven", '7'}, |
---|
| 91 | {"eight", '8'}, |
---|
| 92 | {"nine", '9'}, |
---|
| 93 | {"colon", ':'}, |
---|
| 94 | {"semicolon", ';'}, |
---|
| 95 | {"less-than-sign", '<'}, |
---|
| 96 | {"equals-sign", '='}, |
---|
| 97 | {"greater-than-sign", '>'}, |
---|
| 98 | {"question-mark", '?'}, |
---|
| 99 | {"commercial-at", '@'}, |
---|
| 100 | {"left-square-bracket", '['}, |
---|
| 101 | {"backslash", '\\'}, |
---|
| 102 | {"reverse-solidus", '\\'}, |
---|
| 103 | {"right-square-bracket", ']'}, |
---|
| 104 | {"circumflex", '^'}, |
---|
| 105 | {"circumflex-accent", '^'}, |
---|
| 106 | {"underscore", '_'}, |
---|
| 107 | {"low-line", '_'}, |
---|
| 108 | {"grave-accent", '`'}, |
---|
| 109 | {"left-brace", '{'}, |
---|
| 110 | {"left-curly-bracket", '{'}, |
---|
| 111 | {"vertical-line", '|'}, |
---|
| 112 | {"right-brace", '}'}, |
---|
| 113 | {"right-curly-bracket", '}'}, |
---|
| 114 | {"tilde", '~'}, |
---|
| 115 | {"DEL", '\177'}, |
---|
| 116 | {NULL, 0} |
---|
| 117 | }; |
---|
| 118 | |
---|
| 119 | /* |
---|
| 120 | * Unicode character-class tables. |
---|
| 121 | */ |
---|
| 122 | |
---|
| 123 | typedef struct crange { |
---|
| 124 | chr start; |
---|
| 125 | chr end; |
---|
| 126 | } crange; |
---|
| 127 | |
---|
| 128 | /* |
---|
| 129 | * Declarations of Unicode character ranges. This code |
---|
| 130 | * is automatically generated by the tools/uniClass.tcl script |
---|
| 131 | * and used in generic/regc_locale.c. Do not modify by hand. |
---|
| 132 | */ |
---|
| 133 | |
---|
| 134 | /* Unicode: alphabetic characters */ |
---|
| 135 | |
---|
| 136 | static const crange alphaRangeTable[] = { |
---|
| 137 | {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6}, |
---|
| 138 | {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8}, |
---|
| 139 | {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1}, |
---|
| 140 | {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481}, |
---|
| 141 | {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587}, |
---|
| 142 | {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a}, |
---|
| 143 | {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5}, |
---|
| 144 | {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8}, |
---|
| 145 | {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a}, |
---|
| 146 | {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74}, |
---|
| 147 | {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, |
---|
| 148 | {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, |
---|
| 149 | {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, |
---|
| 150 | {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9}, |
---|
| 151 | {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, |
---|
| 152 | {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, |
---|
| 153 | {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, |
---|
| 154 | {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, |
---|
| 155 | {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46}, |
---|
| 156 | {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0}, |
---|
| 157 | {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b}, |
---|
| 158 | {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, |
---|
| 159 | {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, |
---|
| 160 | {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, |
---|
| 161 | {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, |
---|
| 162 | {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, |
---|
| 163 | {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, |
---|
| 164 | {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4}, |
---|
| 165 | {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea}, |
---|
| 166 | {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b}, |
---|
| 167 | {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, |
---|
| 168 | {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, |
---|
| 169 | {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, |
---|
| 170 | {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, |
---|
| 171 | {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131}, |
---|
| 172 | {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, |
---|
| 173 | {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, |
---|
| 174 | {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3}, |
---|
| 175 | {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, |
---|
| 176 | {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, |
---|
| 177 | {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72}, |
---|
| 178 | {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, |
---|
| 179 | {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} |
---|
| 180 | }; |
---|
| 181 | |
---|
| 182 | #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) |
---|
| 183 | |
---|
| 184 | static const chr alphaCharTable[] = { |
---|
| 185 | 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, |
---|
| 186 | 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, |
---|
| 187 | 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, |
---|
| 188 | 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, |
---|
| 189 | 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f, |
---|
| 190 | 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c, |
---|
| 191 | 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1, |
---|
| 192 | 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87, |
---|
| 193 | 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, |
---|
| 194 | 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258, |
---|
| 195 | 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f, |
---|
| 196 | 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d, |
---|
| 197 | 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe |
---|
| 198 | }; |
---|
| 199 | |
---|
| 200 | #define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) |
---|
| 201 | |
---|
| 202 | /* |
---|
| 203 | * Unicode: decimal digit characters |
---|
| 204 | */ |
---|
| 205 | |
---|
| 206 | static const crange digitRangeTable[] = { |
---|
| 207 | {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f}, |
---|
| 208 | {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f}, |
---|
| 209 | {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f}, |
---|
| 210 | {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049}, |
---|
| 211 | {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19} |
---|
| 212 | }; |
---|
| 213 | |
---|
| 214 | #define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange)) |
---|
| 215 | |
---|
| 216 | /* |
---|
| 217 | * no singletons of digit characters. |
---|
| 218 | */ |
---|
| 219 | |
---|
| 220 | /* |
---|
| 221 | * Unicode: punctuation characters. |
---|
| 222 | */ |
---|
| 223 | |
---|
| 224 | static const crange punctRangeTable[] = { |
---|
| 225 | {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d}, |
---|
| 226 | {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12}, |
---|
| 227 | {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed}, |
---|
| 228 | {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043}, |
---|
| 229 | {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, |
---|
| 230 | {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, |
---|
| 231 | {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65} |
---|
| 232 | }; |
---|
| 233 | |
---|
| 234 | #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) |
---|
| 235 | |
---|
| 236 | static const chr punctCharTable[] = { |
---|
| 237 | 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, |
---|
| 238 | 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, |
---|
| 239 | 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, |
---|
| 240 | 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d, |
---|
| 241 | 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d, |
---|
| 242 | 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, |
---|
| 243 | 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d |
---|
| 244 | }; |
---|
| 245 | |
---|
| 246 | #define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) |
---|
| 247 | |
---|
| 248 | /* |
---|
| 249 | * Unicode: white space characters. |
---|
| 250 | */ |
---|
| 251 | |
---|
| 252 | static const crange spaceRangeTable[] = { |
---|
| 253 | {0x0009, 0x000d}, {0x2000, 0x200b} |
---|
| 254 | }; |
---|
| 255 | |
---|
| 256 | #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) |
---|
| 257 | |
---|
| 258 | static const chr spaceCharTable[] = { |
---|
| 259 | 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000 |
---|
| 260 | }; |
---|
| 261 | |
---|
| 262 | #define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) |
---|
| 263 | |
---|
| 264 | /* |
---|
| 265 | * Unicode: lowercase characters |
---|
| 266 | */ |
---|
| 267 | |
---|
| 268 | static const crange lowerRangeTable[] = { |
---|
| 269 | {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180}, |
---|
| 270 | {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce}, |
---|
| 271 | {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587}, |
---|
| 272 | {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, |
---|
| 273 | {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, |
---|
| 274 | {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, |
---|
| 275 | {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, |
---|
| 276 | {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} |
---|
| 277 | }; |
---|
| 278 | |
---|
| 279 | #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) |
---|
| 280 | |
---|
| 281 | static const chr lowerCharTable[] = { |
---|
| 282 | 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, |
---|
| 283 | 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, |
---|
| 284 | 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, |
---|
| 285 | 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140, |
---|
| 286 | 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151, |
---|
| 287 | 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163, |
---|
| 288 | 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175, |
---|
| 289 | 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192, |
---|
| 290 | 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad, |
---|
| 291 | 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce, |
---|
| 292 | 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df, |
---|
| 293 | 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0, |
---|
| 294 | 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205, |
---|
| 295 | 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, |
---|
| 296 | 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, |
---|
| 297 | 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd, |
---|
| 298 | 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5, |
---|
| 299 | 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471, |
---|
| 300 | 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d, |
---|
| 301 | 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, |
---|
| 302 | 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, |
---|
| 303 | 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, |
---|
| 304 | 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, |
---|
| 305 | 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef, |
---|
| 306 | 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, |
---|
| 307 | 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, |
---|
| 308 | 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d, |
---|
| 309 | 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f, |
---|
| 310 | 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, |
---|
| 311 | 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, |
---|
| 312 | 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, |
---|
| 313 | 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87, |
---|
| 314 | 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5, |
---|
| 315 | 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7, |
---|
| 316 | 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, |
---|
| 317 | 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, |
---|
| 318 | 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, |
---|
| 319 | 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe, |
---|
| 320 | 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e, |
---|
| 321 | 0x210f, 0x2113, 0x212f, 0x2134, 0x2139 |
---|
| 322 | }; |
---|
| 323 | |
---|
| 324 | #define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) |
---|
| 325 | |
---|
| 326 | /* |
---|
| 327 | * Unicode: uppercase characters. |
---|
| 328 | */ |
---|
| 329 | |
---|
| 330 | static const crange upperRangeTable[] = { |
---|
| 331 | {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b}, |
---|
| 332 | {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8}, |
---|
| 333 | {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4}, |
---|
| 334 | {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f}, |
---|
| 335 | {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, |
---|
| 336 | {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, |
---|
| 337 | {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, |
---|
| 338 | {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a} |
---|
| 339 | }; |
---|
| 340 | |
---|
| 341 | #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) |
---|
| 342 | |
---|
| 343 | static const chr upperCharTable[] = { |
---|
| 344 | 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, |
---|
| 345 | 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, |
---|
| 346 | 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, |
---|
| 347 | 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147, |
---|
| 348 | 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a, |
---|
| 349 | 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, |
---|
| 350 | 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, |
---|
| 351 | 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d, |
---|
| 352 | 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, |
---|
| 353 | 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, |
---|
| 354 | 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0, |
---|
| 355 | 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4, |
---|
| 356 | 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, |
---|
| 357 | 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, |
---|
| 358 | 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, |
---|
| 359 | 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0, |
---|
| 360 | 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460, |
---|
| 361 | 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, |
---|
| 362 | 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e, |
---|
| 363 | 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, |
---|
| 364 | 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, |
---|
| 365 | 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3, |
---|
| 366 | 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc, |
---|
| 367 | 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee, |
---|
| 368 | 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, |
---|
| 369 | 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, |
---|
| 370 | 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, |
---|
| 371 | 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, |
---|
| 372 | 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, |
---|
| 373 | 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, |
---|
| 374 | 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, |
---|
| 375 | 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, |
---|
| 376 | 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2, |
---|
| 377 | 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, |
---|
| 378 | 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, |
---|
| 379 | 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, |
---|
| 380 | 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, |
---|
| 381 | 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b, |
---|
| 382 | 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130, |
---|
| 383 | 0x2131, 0x2133 |
---|
| 384 | }; |
---|
| 385 | |
---|
| 386 | #define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) |
---|
| 387 | |
---|
| 388 | /* |
---|
| 389 | * Unicode: unicode print characters excluding space. |
---|
| 390 | */ |
---|
| 391 | |
---|
| 392 | static const crange graphRangeTable[] = { |
---|
| 393 | {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233}, |
---|
| 394 | {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e}, |
---|
| 395 | {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce}, |
---|
| 396 | {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486}, |
---|
| 397 | {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f}, |
---|
| 398 | {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4}, |
---|
| 399 | {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655}, |
---|
| 400 | {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d}, |
---|
| 401 | {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0}, |
---|
| 402 | {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d}, |
---|
| 403 | {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c}, |
---|
| 404 | {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4}, |
---|
| 405 | {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a}, |
---|
| 406 | {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, |
---|
| 407 | {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83}, |
---|
| 408 | {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, |
---|
| 409 | {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, |
---|
| 410 | {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f}, |
---|
| 411 | {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43}, |
---|
| 412 | {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a}, |
---|
| 413 | {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, |
---|
| 414 | {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, |
---|
| 415 | {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, |
---|
| 416 | {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, |
---|
| 417 | {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f}, |
---|
| 418 | {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, |
---|
| 419 | {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, |
---|
| 420 | {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, |
---|
| 421 | {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48}, |
---|
| 422 | {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, |
---|
| 423 | {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, |
---|
| 424 | {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, |
---|
| 425 | {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, |
---|
| 426 | {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, |
---|
| 427 | {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b}, |
---|
| 428 | {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f}, |
---|
| 429 | {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059}, |
---|
| 430 | {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159}, |
---|
| 431 | {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f}, |
---|
| 432 | {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, |
---|
| 433 | {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5}, |
---|
| 434 | {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, |
---|
| 435 | {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e}, |
---|
| 436 | {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4}, |
---|
| 437 | {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676}, |
---|
| 438 | {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9}, |
---|
| 439 | {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9}, |
---|
| 440 | {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, |
---|
| 441 | {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, |
---|
| 442 | {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, |
---|
| 443 | {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, |
---|
| 444 | {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046}, |
---|
| 445 | {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3}, |
---|
| 446 | {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3}, |
---|
| 447 | {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b}, |
---|
| 448 | {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a}, |
---|
| 449 | {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7}, |
---|
| 450 | {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704}, |
---|
| 451 | {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b}, |
---|
| 452 | {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794}, |
---|
| 453 | {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff}, |
---|
| 454 | {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, |
---|
| 455 | {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094}, |
---|
| 456 | {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c}, |
---|
| 457 | {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243}, |
---|
| 458 | {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe}, |
---|
| 459 | {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe}, |
---|
| 460 | {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f}, |
---|
| 461 | {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f}, |
---|
| 462 | {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f}, |
---|
| 463 | {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f}, |
---|
| 464 | {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f}, |
---|
| 465 | {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f}, |
---|
| 466 | {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f}, |
---|
| 467 | {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f}, |
---|
| 468 | {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f}, |
---|
| 469 | {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f}, |
---|
| 470 | {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f}, |
---|
| 471 | {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f}, |
---|
| 472 | {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f}, |
---|
| 473 | {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f}, |
---|
| 474 | {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f}, |
---|
| 475 | {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f}, |
---|
| 476 | {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f}, |
---|
| 477 | {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f}, |
---|
| 478 | {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f}, |
---|
| 479 | {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f}, |
---|
| 480 | {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f}, |
---|
| 481 | {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f}, |
---|
| 482 | {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f}, |
---|
| 483 | {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f}, |
---|
| 484 | {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f}, |
---|
| 485 | {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f}, |
---|
| 486 | {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f}, |
---|
| 487 | {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f}, |
---|
| 488 | {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c}, |
---|
| 489 | {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4}, |
---|
| 490 | {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f}, |
---|
| 491 | {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f}, |
---|
| 492 | {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f}, |
---|
| 493 | {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f}, |
---|
| 494 | {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f}, |
---|
| 495 | {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f}, |
---|
| 496 | {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f}, |
---|
| 497 | {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f}, |
---|
| 498 | {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f}, |
---|
| 499 | {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f}, |
---|
| 500 | {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f}, |
---|
| 501 | {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, |
---|
| 502 | {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36}, |
---|
| 503 | {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f}, |
---|
| 504 | {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, |
---|
| 505 | {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66}, |
---|
| 506 | {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f}, |
---|
| 507 | {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, |
---|
| 508 | {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, |
---|
| 509 | {0xfffc, 0xffff} |
---|
| 510 | }; |
---|
| 511 | |
---|
| 512 | #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) |
---|
| 513 | |
---|
| 514 | static const chr graphCharTable[] = { |
---|
| 515 | 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, |
---|
| 516 | 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, |
---|
| 517 | 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, |
---|
| 518 | 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, |
---|
| 519 | 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0, |
---|
| 520 | 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, |
---|
| 521 | 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, |
---|
| 522 | 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5, |
---|
| 523 | 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61, |
---|
| 524 | 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, |
---|
| 525 | 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, |
---|
| 526 | 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288, |
---|
| 527 | 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756, |
---|
| 528 | 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74 |
---|
| 529 | }; |
---|
| 530 | |
---|
| 531 | #define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) |
---|
| 532 | |
---|
| 533 | /* |
---|
| 534 | * Unicode: unicode print characters including space, i.e. all Letters (class |
---|
| 535 | * L*), Numbers (N*), Punctuation (P*), Symbols (S*) and Spaces (Zs). |
---|
| 536 | */ |
---|
| 537 | |
---|
| 538 | static const crange printRangeTable[] = { |
---|
| 539 | {0x0020, 0x007E}, {0x00A0, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8}, |
---|
| 540 | {0x02B0, 0x02DE}, {0x02E0, 0x02E9}, {0x0374, 0x0375}, {0x0384, 0x038A}, |
---|
| 541 | {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3}, |
---|
| 542 | {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0482}, |
---|
| 543 | {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, |
---|
| 544 | {0x04EE, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x055F}, |
---|
| 545 | {0x0561, 0x0587}, {0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0621, 0x063A}, |
---|
| 546 | {0x0640, 0x064A}, {0x0660, 0x066D}, {0x0671, 0x06B7}, {0x06BA, 0x06BE}, |
---|
| 547 | {0x06C0, 0x06CE}, {0x06D0, 0x06D5}, {0x06E5, 0x06E6}, {0x06F0, 0x06F9}, |
---|
| 548 | {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0964, 0x0970}, {0x0985, 0x098C}, |
---|
| 549 | {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, |
---|
| 550 | {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09E6, 0x09FA}, {0x0A05, 0x0A0A}, |
---|
| 551 | {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, |
---|
| 552 | {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A66, 0x0A6F}, |
---|
| 553 | {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8}, |
---|
| 554 | {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0AE6, 0x0AEF}, |
---|
| 555 | {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, |
---|
| 556 | {0x0B32, 0x0B33}, {0x0B36, 0x0B39}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, |
---|
| 557 | {0x0B66, 0x0B70}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, |
---|
| 558 | {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, |
---|
| 559 | {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0BE7, 0x0BF2}, {0x0C05, 0x0C0C}, |
---|
| 560 | {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39}, |
---|
| 561 | {0x0C60, 0x0C61}, {0x0C66, 0x0C6F}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90}, |
---|
| 562 | {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CE0, 0x0CE1}, |
---|
| 563 | {0x0CE6, 0x0CEF}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, |
---|
| 564 | {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0D66, 0x0D6F}, {0x0E3F, 0x0E46}, |
---|
| 565 | {0x0E4F, 0x0E5B}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB}, |
---|
| 566 | {0x0EAD, 0x0EB0}, {0x0EB2, 0x0EB3}, {0x0EC0, 0x0EC4}, {0x0ED0, 0x0ED9}, |
---|
| 567 | {0x0EDC, 0x0EDD}, {0x0F00, 0x0F17}, {0x0F1A, 0x0F34}, {0x0F3A, 0x0F3D}, |
---|
| 568 | {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x0F88, 0x0F8B}, {0x10A0, 0x10C5}, |
---|
| 569 | {0x10D0, 0x10F6}, {0x1100, 0x1159}, {0x115F, 0x11A2}, {0x11A8, 0x11F9}, |
---|
| 570 | {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, |
---|
| 571 | {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, |
---|
| 572 | {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB}, |
---|
| 573 | {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE}, {0x2000, 0x200B}, |
---|
| 574 | {0x2010, 0x2027}, {0x2030, 0x2046}, {0x2074, 0x208E}, {0x20A0, 0x20AC}, |
---|
| 575 | {0x2100, 0x2138}, {0x2153, 0x2182}, {0x2190, 0x21EA}, {0x2200, 0x22F1}, |
---|
| 576 | {0x2302, 0x237A}, {0x2400, 0x2424}, {0x2440, 0x244A}, {0x2460, 0x24EA}, |
---|
| 577 | {0x2500, 0x2595}, {0x25A0, 0x25EF}, {0x2600, 0x2613}, {0x261A, 0x266F}, |
---|
| 578 | {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727}, {0x2729, 0x274B}, |
---|
| 579 | {0x274F, 0x2752}, {0x2758, 0x275E}, {0x2761, 0x2767}, {0x2776, 0x2794}, |
---|
| 580 | {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x3000, 0x3029}, {0x3030, 0x3037}, |
---|
| 581 | {0x3041, 0x3094}, {0x309B, 0x309E}, {0x30A1, 0x30FE}, {0x3105, 0x312C}, |
---|
| 582 | {0x3131, 0x318E}, {0x3190, 0x319F}, {0x3200, 0x321C}, {0x3220, 0x3243}, |
---|
| 583 | {0x3260, 0x327B}, {0x327F, 0x32B0}, {0x32C0, 0x32CB}, {0x32D0, 0x32FE}, |
---|
| 584 | {0x3300, 0x3376}, {0x337B, 0x33DD}, {0x33E0, 0x33FE}, {0x4E00, 0x9FA5}, |
---|
| 585 | {0xAC00, 0xD7A3}, {0xF900, 0xFA2D}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, |
---|
| 586 | {0xFB1F, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44}, |
---|
| 587 | {0xFB46, 0xFBB1}, {0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, |
---|
| 588 | {0xFDF0, 0xFDFB}, {0xFE30, 0xFE44}, {0xFE49, 0xFE52}, {0xFE54, 0xFE66}, |
---|
| 589 | {0xFE68, 0xFE6B}, {0xFE70, 0xFE72}, {0xFE76, 0xFEFC}, {0xFF01, 0xFF5E}, |
---|
| 590 | {0xFF61, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, |
---|
| 591 | {0xFFDA, 0xFFDC}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD} |
---|
| 592 | }; |
---|
| 593 | |
---|
| 594 | #define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange)) |
---|
| 595 | |
---|
| 596 | static const chr printCharTable[] = { |
---|
| 597 | 0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE, |
---|
| 598 | 0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2, |
---|
| 599 | 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01, |
---|
| 600 | 0x0E32, 0x0E81, 0x0E84, 0x0E87, 0x0E8A, 0x0E8D, 0x0E94, 0x0EA5, 0x0EA7, |
---|
| 601 | 0x0EBD, 0x0EC6, 0x0F36, 0x0F38, 0x0F85, 0x10FB, 0x1F59, 0x1F5B, 0x1F5D, |
---|
| 602 | 0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74 |
---|
| 603 | }; |
---|
| 604 | |
---|
| 605 | #define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr)) |
---|
| 606 | |
---|
| 607 | /* |
---|
| 608 | * End of auto-generated Unicode character ranges declarations. |
---|
| 609 | */ |
---|
| 610 | |
---|
| 611 | #define CH NOCELT |
---|
| 612 | |
---|
| 613 | /* |
---|
| 614 | - element - map collating-element name to celt |
---|
| 615 | ^ static celt element(struct vars *, const chr *, const chr *); |
---|
| 616 | */ |
---|
| 617 | static celt |
---|
| 618 | element( |
---|
| 619 | struct vars *v, /* context */ |
---|
| 620 | const chr *startp, /* points to start of name */ |
---|
| 621 | const chr *endp) /* points just past end of name */ |
---|
| 622 | { |
---|
| 623 | const struct cname *cn; |
---|
| 624 | size_t len; |
---|
| 625 | Tcl_DString ds; |
---|
| 626 | const char *np; |
---|
| 627 | |
---|
| 628 | /* |
---|
| 629 | * Generic: one-chr names stand for themselves. |
---|
| 630 | */ |
---|
| 631 | |
---|
| 632 | assert(startp < endp); |
---|
| 633 | len = endp - startp; |
---|
| 634 | if (len == 1) { |
---|
| 635 | return *startp; |
---|
| 636 | } |
---|
| 637 | |
---|
| 638 | NOTE(REG_ULOCALE); |
---|
| 639 | |
---|
| 640 | /* |
---|
| 641 | * Search table. |
---|
| 642 | */ |
---|
| 643 | |
---|
| 644 | Tcl_DStringInit(&ds); |
---|
| 645 | np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); |
---|
| 646 | for (cn=cnames; cn->name!=NULL; cn++) { |
---|
| 647 | if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) { |
---|
| 648 | break; /* NOTE BREAK OUT */ |
---|
| 649 | } |
---|
| 650 | } |
---|
| 651 | Tcl_DStringFree(&ds); |
---|
| 652 | if (cn->name != NULL) { |
---|
| 653 | return CHR(cn->code); |
---|
| 654 | } |
---|
| 655 | |
---|
| 656 | /* |
---|
| 657 | * Couldn't find it. |
---|
| 658 | */ |
---|
| 659 | |
---|
| 660 | ERR(REG_ECOLLATE); |
---|
| 661 | return 0; |
---|
| 662 | } |
---|
| 663 | |
---|
| 664 | /* |
---|
| 665 | - range - supply cvec for a range, including legality check |
---|
| 666 | ^ static struct cvec *range(struct vars *, celt, celt, int); |
---|
| 667 | */ |
---|
| 668 | static struct cvec * |
---|
| 669 | range( |
---|
| 670 | struct vars *v, /* context */ |
---|
| 671 | celt a, /* range start */ |
---|
| 672 | celt b, /* range end, might equal a */ |
---|
| 673 | int cases) /* case-independent? */ |
---|
| 674 | { |
---|
| 675 | int nchrs; |
---|
| 676 | struct cvec *cv; |
---|
| 677 | celt c, lc, uc, tc; |
---|
| 678 | |
---|
| 679 | if (a != b && !before(a, b)) { |
---|
| 680 | ERR(REG_ERANGE); |
---|
| 681 | return NULL; |
---|
| 682 | } |
---|
| 683 | |
---|
| 684 | if (!cases) { /* easy version */ |
---|
| 685 | cv = getcvec(v, 0, 1); |
---|
| 686 | NOERRN(); |
---|
| 687 | addrange(cv, a, b); |
---|
| 688 | return cv; |
---|
| 689 | } |
---|
| 690 | |
---|
| 691 | /* |
---|
| 692 | * When case-independent, it's hard to decide when cvec ranges are usable, |
---|
| 693 | * so for now at least, we won't try. We allocate enough space for two |
---|
| 694 | * case variants plus a little extra for the two title case variants. |
---|
| 695 | */ |
---|
| 696 | |
---|
| 697 | nchrs = (b - a + 1)*2 + 4; |
---|
| 698 | |
---|
| 699 | cv = getcvec(v, nchrs, 0); |
---|
| 700 | NOERRN(); |
---|
| 701 | |
---|
| 702 | for (c=a; c<=b; c++) { |
---|
| 703 | addchr(cv, c); |
---|
| 704 | lc = Tcl_UniCharToLower((chr)c); |
---|
| 705 | uc = Tcl_UniCharToUpper((chr)c); |
---|
| 706 | tc = Tcl_UniCharToTitle((chr)c); |
---|
| 707 | if (c != lc) { |
---|
| 708 | addchr(cv, lc); |
---|
| 709 | } |
---|
| 710 | if (c != uc) { |
---|
| 711 | addchr(cv, uc); |
---|
| 712 | } |
---|
| 713 | if (c != tc && tc != uc) { |
---|
| 714 | addchr(cv, tc); |
---|
| 715 | } |
---|
| 716 | } |
---|
| 717 | |
---|
| 718 | return cv; |
---|
| 719 | } |
---|
| 720 | |
---|
| 721 | /* |
---|
| 722 | - before - is celt x before celt y, for purposes of range legality? |
---|
| 723 | ^ static int before(celt, celt); |
---|
| 724 | */ |
---|
| 725 | static int /* predicate */ |
---|
| 726 | before( |
---|
| 727 | celt x, celt y) /* collating elements */ |
---|
| 728 | { |
---|
| 729 | if (x < y) { |
---|
| 730 | return 1; |
---|
| 731 | } |
---|
| 732 | return 0; |
---|
| 733 | } |
---|
| 734 | |
---|
| 735 | /* |
---|
| 736 | - eclass - supply cvec for an equivalence class |
---|
| 737 | * Must include case counterparts on request. |
---|
| 738 | ^ static struct cvec *eclass(struct vars *, celt, int); |
---|
| 739 | */ |
---|
| 740 | static struct cvec * |
---|
| 741 | eclass( |
---|
| 742 | struct vars *v, /* context */ |
---|
| 743 | celt c, /* Collating element representing the |
---|
| 744 | * equivalence class. */ |
---|
| 745 | int cases) /* all cases? */ |
---|
| 746 | { |
---|
| 747 | struct cvec *cv; |
---|
| 748 | |
---|
| 749 | /* |
---|
| 750 | * Crude fake equivalence class for testing. |
---|
| 751 | */ |
---|
| 752 | |
---|
| 753 | if ((v->cflags®_FAKE) && c == 'x') { |
---|
| 754 | cv = getcvec(v, 4, 0); |
---|
| 755 | addchr(cv, (chr)'x'); |
---|
| 756 | addchr(cv, (chr)'y'); |
---|
| 757 | if (cases) { |
---|
| 758 | addchr(cv, (chr)'X'); |
---|
| 759 | addchr(cv, (chr)'Y'); |
---|
| 760 | } |
---|
| 761 | return cv; |
---|
| 762 | } |
---|
| 763 | |
---|
| 764 | /* |
---|
| 765 | * Otherwise, none. |
---|
| 766 | */ |
---|
| 767 | |
---|
| 768 | if (cases) { |
---|
| 769 | return allcases(v, c); |
---|
| 770 | } |
---|
| 771 | cv = getcvec(v, 1, 0); |
---|
| 772 | assert(cv != NULL); |
---|
| 773 | addchr(cv, (chr)c); |
---|
| 774 | return cv; |
---|
| 775 | } |
---|
| 776 | |
---|
| 777 | /* |
---|
| 778 | - cclass - supply cvec for a character class |
---|
| 779 | * Must include case counterparts on request. |
---|
| 780 | ^ static struct cvec *cclass(struct vars *, const chr *, const chr *, int); |
---|
| 781 | */ |
---|
| 782 | static struct cvec * |
---|
| 783 | cclass( |
---|
| 784 | struct vars *v, /* context */ |
---|
| 785 | const chr *startp, /* where the name starts */ |
---|
| 786 | const chr *endp, /* just past the end of the name */ |
---|
| 787 | int cases) /* case-independent? */ |
---|
| 788 | { |
---|
| 789 | size_t len; |
---|
| 790 | struct cvec *cv = NULL; |
---|
| 791 | Tcl_DString ds; |
---|
| 792 | const char *np; |
---|
| 793 | const char **namePtr; |
---|
| 794 | int i, index; |
---|
| 795 | |
---|
| 796 | /* |
---|
| 797 | * The following arrays define the valid character class names. |
---|
| 798 | */ |
---|
| 799 | |
---|
| 800 | static const char *classNames[] = { |
---|
| 801 | "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", |
---|
| 802 | "lower", "print", "punct", "space", "upper", "xdigit", NULL |
---|
| 803 | }; |
---|
| 804 | |
---|
| 805 | enum classes { |
---|
| 806 | CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, |
---|
| 807 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT |
---|
| 808 | }; |
---|
| 809 | |
---|
| 810 | |
---|
| 811 | /* |
---|
| 812 | * Extract the class name |
---|
| 813 | */ |
---|
| 814 | |
---|
| 815 | len = endp - startp; |
---|
| 816 | Tcl_DStringInit(&ds); |
---|
| 817 | np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); |
---|
| 818 | |
---|
| 819 | /* |
---|
| 820 | * Remap lower and upper to alpha if the match is case insensitive. |
---|
| 821 | */ |
---|
| 822 | |
---|
| 823 | if (cases && len == 5 && (strncmp("lower", np, 5) == 0 |
---|
| 824 | || strncmp("upper", np, 5) == 0)) { |
---|
| 825 | np = "alpha"; |
---|
| 826 | } |
---|
| 827 | |
---|
| 828 | /* |
---|
| 829 | * Map the name to the corresponding enumerated value. |
---|
| 830 | */ |
---|
| 831 | |
---|
| 832 | index = -1; |
---|
| 833 | for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { |
---|
| 834 | if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { |
---|
| 835 | index = i; |
---|
| 836 | break; |
---|
| 837 | } |
---|
| 838 | } |
---|
| 839 | Tcl_DStringFree(&ds); |
---|
| 840 | if (index == -1) { |
---|
| 841 | ERR(REG_ECTYPE); |
---|
| 842 | return NULL; |
---|
| 843 | } |
---|
| 844 | |
---|
| 845 | /* |
---|
| 846 | * Now compute the character class contents. |
---|
| 847 | */ |
---|
| 848 | |
---|
| 849 | switch((enum classes) index) { |
---|
| 850 | case CC_PRINT: |
---|
| 851 | cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE); |
---|
| 852 | if (cv) { |
---|
| 853 | for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) { |
---|
| 854 | addchr(cv, printCharTable[i]); |
---|
| 855 | } |
---|
| 856 | for (i=0 ; (size_t)i<NUM_PRINT_RANGE ; i++) { |
---|
| 857 | addrange(cv, printRangeTable[i].start, |
---|
| 858 | printRangeTable[i].end); |
---|
| 859 | } |
---|
| 860 | } |
---|
| 861 | break; |
---|
| 862 | case CC_ALNUM: |
---|
| 863 | cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE); |
---|
| 864 | if (cv) { |
---|
| 865 | for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { |
---|
| 866 | addchr(cv, alphaCharTable[i]); |
---|
| 867 | } |
---|
| 868 | for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { |
---|
| 869 | addrange(cv, alphaRangeTable[i].start, |
---|
| 870 | alphaRangeTable[i].end); |
---|
| 871 | } |
---|
| 872 | for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { |
---|
| 873 | addrange(cv, digitRangeTable[i].start, |
---|
| 874 | digitRangeTable[i].end); |
---|
| 875 | } |
---|
| 876 | } |
---|
| 877 | break; |
---|
| 878 | case CC_ALPHA: |
---|
| 879 | cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE); |
---|
| 880 | if (cv) { |
---|
| 881 | for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) { |
---|
| 882 | addrange(cv, alphaRangeTable[i].start, |
---|
| 883 | alphaRangeTable[i].end); |
---|
| 884 | } |
---|
| 885 | for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) { |
---|
| 886 | addchr(cv, alphaCharTable[i]); |
---|
| 887 | } |
---|
| 888 | } |
---|
| 889 | break; |
---|
| 890 | case CC_ASCII: |
---|
| 891 | cv = getcvec(v, 0, 1); |
---|
| 892 | if (cv) { |
---|
| 893 | addrange(cv, 0, 0x7f); |
---|
| 894 | } |
---|
| 895 | break; |
---|
| 896 | case CC_BLANK: |
---|
| 897 | cv = getcvec(v, 2, 0); |
---|
| 898 | addchr(cv, '\t'); |
---|
| 899 | addchr(cv, ' '); |
---|
| 900 | break; |
---|
| 901 | case CC_CNTRL: |
---|
| 902 | cv = getcvec(v, 0, 2); |
---|
| 903 | addrange(cv, 0x0, 0x1f); |
---|
| 904 | addrange(cv, 0x7f, 0x9f); |
---|
| 905 | break; |
---|
| 906 | case CC_DIGIT: |
---|
| 907 | cv = getcvec(v, 0, NUM_DIGIT_RANGE); |
---|
| 908 | if (cv) { |
---|
| 909 | for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) { |
---|
| 910 | addrange(cv, digitRangeTable[i].start, |
---|
| 911 | digitRangeTable[i].end); |
---|
| 912 | } |
---|
| 913 | } |
---|
| 914 | break; |
---|
| 915 | case CC_PUNCT: |
---|
| 916 | cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE); |
---|
| 917 | if (cv) { |
---|
| 918 | for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) { |
---|
| 919 | addrange(cv, punctRangeTable[i].start, |
---|
| 920 | punctRangeTable[i].end); |
---|
| 921 | } |
---|
| 922 | for (i=0 ; (size_t)i<NUM_PUNCT_CHAR ; i++) { |
---|
| 923 | addchr(cv, punctCharTable[i]); |
---|
| 924 | } |
---|
| 925 | } |
---|
| 926 | break; |
---|
| 927 | case CC_XDIGIT: |
---|
| 928 | /* |
---|
| 929 | * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no idea how |
---|
| 930 | * to define the digits 'a' through 'f' in non-western locales. The |
---|
| 931 | * concept is quite possibly non portable, or only used in contextx |
---|
| 932 | * where the characters used would be the western ones anyway! |
---|
| 933 | * Whatever is actually the case, the number of ranges is fixed (until |
---|
| 934 | * someone comes up with a better arrangement!) |
---|
| 935 | */ |
---|
| 936 | |
---|
| 937 | cv = getcvec(v, 0, 3); |
---|
| 938 | if (cv) { |
---|
| 939 | addrange(cv, '0', '9'); |
---|
| 940 | addrange(cv, 'a', 'f'); |
---|
| 941 | addrange(cv, 'A', 'F'); |
---|
| 942 | } |
---|
| 943 | break; |
---|
| 944 | case CC_SPACE: |
---|
| 945 | cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE); |
---|
| 946 | if (cv) { |
---|
| 947 | for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) { |
---|
| 948 | addrange(cv, spaceRangeTable[i].start, |
---|
| 949 | spaceRangeTable[i].end); |
---|
| 950 | } |
---|
| 951 | for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) { |
---|
| 952 | addchr(cv, spaceCharTable[i]); |
---|
| 953 | } |
---|
| 954 | } |
---|
| 955 | break; |
---|
| 956 | case CC_LOWER: |
---|
| 957 | cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE); |
---|
| 958 | if (cv) { |
---|
| 959 | for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) { |
---|
| 960 | addrange(cv, lowerRangeTable[i].start, |
---|
| 961 | lowerRangeTable[i].end); |
---|
| 962 | } |
---|
| 963 | for (i=0 ; (size_t)i<NUM_LOWER_CHAR ; i++) { |
---|
| 964 | addchr(cv, lowerCharTable[i]); |
---|
| 965 | } |
---|
| 966 | } |
---|
| 967 | break; |
---|
| 968 | case CC_UPPER: |
---|
| 969 | cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE); |
---|
| 970 | if (cv) { |
---|
| 971 | for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) { |
---|
| 972 | addrange(cv, upperRangeTable[i].start, |
---|
| 973 | upperRangeTable[i].end); |
---|
| 974 | } |
---|
| 975 | for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) { |
---|
| 976 | addchr(cv, upperCharTable[i]); |
---|
| 977 | } |
---|
| 978 | } |
---|
| 979 | break; |
---|
| 980 | case CC_GRAPH: |
---|
| 981 | cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE); |
---|
| 982 | if (cv) { |
---|
| 983 | for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) { |
---|
| 984 | addrange(cv, graphRangeTable[i].start, |
---|
| 985 | graphRangeTable[i].end); |
---|
| 986 | } |
---|
| 987 | for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) { |
---|
| 988 | addchr(cv, graphCharTable[i]); |
---|
| 989 | } |
---|
| 990 | } |
---|
| 991 | break; |
---|
| 992 | } |
---|
| 993 | if (cv == NULL) { |
---|
| 994 | ERR(REG_ESPACE); |
---|
| 995 | } |
---|
| 996 | return cv; |
---|
| 997 | } |
---|
| 998 | |
---|
| 999 | /* |
---|
| 1000 | - allcases - supply cvec for all case counterparts of a chr (including itself) |
---|
| 1001 | * This is a shortcut, preferably an efficient one, for simple characters; |
---|
| 1002 | * messy cases are done via range(). |
---|
| 1003 | ^ static struct cvec *allcases(struct vars *, pchr); |
---|
| 1004 | */ |
---|
| 1005 | static struct cvec * |
---|
| 1006 | allcases( |
---|
| 1007 | struct vars *v, /* context */ |
---|
| 1008 | pchr pc) /* character to get case equivs of */ |
---|
| 1009 | { |
---|
| 1010 | struct cvec *cv; |
---|
| 1011 | chr c = (chr)pc; |
---|
| 1012 | chr lc, uc, tc; |
---|
| 1013 | |
---|
| 1014 | lc = Tcl_UniCharToLower((chr)c); |
---|
| 1015 | uc = Tcl_UniCharToUpper((chr)c); |
---|
| 1016 | tc = Tcl_UniCharToTitle((chr)c); |
---|
| 1017 | |
---|
| 1018 | if (tc != uc) { |
---|
| 1019 | cv = getcvec(v, 3, 0); |
---|
| 1020 | addchr(cv, tc); |
---|
| 1021 | } else { |
---|
| 1022 | cv = getcvec(v, 2, 0); |
---|
| 1023 | } |
---|
| 1024 | addchr(cv, lc); |
---|
| 1025 | if (lc != uc) { |
---|
| 1026 | addchr(cv, uc); |
---|
| 1027 | } |
---|
| 1028 | return cv; |
---|
| 1029 | } |
---|
| 1030 | |
---|
| 1031 | /* |
---|
| 1032 | - cmp - chr-substring compare |
---|
| 1033 | * Backrefs need this. It should preferably be efficient. |
---|
| 1034 | * Note that it does not need to report anything except equal/unequal. |
---|
| 1035 | * Note also that the length is exact, and the comparison should not |
---|
| 1036 | * stop at embedded NULs! |
---|
| 1037 | ^ static int cmp(const chr *, const chr *, size_t); |
---|
| 1038 | */ |
---|
| 1039 | static int /* 0 for equal, nonzero for unequal */ |
---|
| 1040 | cmp( |
---|
| 1041 | const chr *x, const chr *y, /* strings to compare */ |
---|
| 1042 | size_t len) /* exact length of comparison */ |
---|
| 1043 | { |
---|
| 1044 | return memcmp(VS(x), VS(y), len*sizeof(chr)); |
---|
| 1045 | } |
---|
| 1046 | |
---|
| 1047 | /* |
---|
| 1048 | - casecmp - case-independent chr-substring compare |
---|
| 1049 | * REG_ICASE backrefs need this. It should preferably be efficient. |
---|
| 1050 | * Note that it does not need to report anything except equal/unequal. |
---|
| 1051 | * Note also that the length is exact, and the comparison should not |
---|
| 1052 | * stop at embedded NULs! |
---|
| 1053 | ^ static int casecmp(const chr *, const chr *, size_t); |
---|
| 1054 | */ |
---|
| 1055 | static int /* 0 for equal, nonzero for unequal */ |
---|
| 1056 | casecmp( |
---|
| 1057 | const chr *x, const chr *y, /* strings to compare */ |
---|
| 1058 | size_t len) /* exact length of comparison */ |
---|
| 1059 | { |
---|
| 1060 | for (; len > 0; len--, x++, y++) { |
---|
| 1061 | if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { |
---|
| 1062 | return 1; |
---|
| 1063 | } |
---|
| 1064 | } |
---|
| 1065 | return 0; |
---|
| 1066 | } |
---|
| 1067 | |
---|
| 1068 | /* |
---|
| 1069 | * Local Variables: |
---|
| 1070 | * mode: c |
---|
| 1071 | * c-basic-offset: 4 |
---|
| 1072 | * fill-column: 78 |
---|
| 1073 | * End: |
---|
| 1074 | */ |
---|