[25] | 1 | # This file contains a collection of tests for tclUtf.c |
---|
| 2 | # Sourcing this file into Tcl runs the tests and generates output for |
---|
| 3 | # errors. No output means no errors were found. |
---|
| 4 | # |
---|
| 5 | # Copyright (c) 1997 Sun Microsystems, Inc. |
---|
| 6 | # Copyright (c) 1998-1999 by Scriptics Corporation. |
---|
| 7 | # |
---|
| 8 | # See the file "license.terms" for information on usage and redistribution |
---|
| 9 | # of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
| 10 | # |
---|
| 11 | # RCS: @(#) $Id: utf.test,v 1.14 2007/05/02 01:37:28 kennykb Exp $ |
---|
| 12 | |
---|
| 13 | if {[lsearch [namespace children] ::tcltest] == -1} { |
---|
| 14 | package require tcltest 2 |
---|
| 15 | namespace import -force ::tcltest::* |
---|
| 16 | } |
---|
| 17 | |
---|
| 18 | catch {unset x} |
---|
| 19 | |
---|
| 20 | test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { |
---|
| 21 | set x \x01 |
---|
| 22 | } [bytestring "\x01"] |
---|
| 23 | test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { |
---|
| 24 | set x "\x00" |
---|
| 25 | } [bytestring "\xc0\x80"] |
---|
| 26 | test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { |
---|
| 27 | set x "\xe0" |
---|
| 28 | } [bytestring "\xc3\xa0"] |
---|
| 29 | test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { |
---|
| 30 | set x "\u4e4e" |
---|
| 31 | } [bytestring "\xe4\xb9\x8e"] |
---|
| 32 | test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { |
---|
| 33 | string length [format %c -1] |
---|
| 34 | } 1 |
---|
| 35 | |
---|
| 36 | test utf-2.1 {Tcl_UtfToUniChar: low ascii} { |
---|
| 37 | string length "abc" |
---|
| 38 | } {3} |
---|
| 39 | test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { |
---|
| 40 | string length [bytestring "\x82\x83\x84"] |
---|
| 41 | } {3} |
---|
| 42 | test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { |
---|
| 43 | string length [bytestring "\xC2"] |
---|
| 44 | } {1} |
---|
| 45 | test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { |
---|
| 46 | string length [bytestring "\xC2\xa2"] |
---|
| 47 | } {1} |
---|
| 48 | test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { |
---|
| 49 | string length [bytestring "\xE2"] |
---|
| 50 | } {1} |
---|
| 51 | test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { |
---|
| 52 | string length [bytestring "\xE2\xA2"] |
---|
| 53 | } {2} |
---|
| 54 | test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { |
---|
| 55 | string length [bytestring "\xE4\xb9\x8e"] |
---|
| 56 | } {1} |
---|
| 57 | test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { |
---|
| 58 | string length [bytestring "\xF4\xA2\xA2\xA2"] |
---|
| 59 | } {4} |
---|
| 60 | |
---|
| 61 | test utf-3.1 {Tcl_UtfCharComplete} { |
---|
| 62 | } {} |
---|
| 63 | |
---|
| 64 | testConstraint testnumutfchars [llength [info commands testnumutfchars]] |
---|
| 65 | test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { |
---|
| 66 | testnumutfchars "" |
---|
| 67 | } {0} |
---|
| 68 | test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { |
---|
| 69 | testnumutfchars [bytestring "\xC2\xA2"] |
---|
| 70 | } {1} |
---|
| 71 | test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { |
---|
| 72 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] |
---|
| 73 | } {7} |
---|
| 74 | test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { |
---|
| 75 | testnumutfchars [bytestring "\xC0\x80"] |
---|
| 76 | } {1} |
---|
| 77 | test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { |
---|
| 78 | testnumutfchars "" 1 |
---|
| 79 | } {0} |
---|
| 80 | test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { |
---|
| 81 | testnumutfchars [bytestring "\xC2\xA2"] 1 |
---|
| 82 | } {1} |
---|
| 83 | test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { |
---|
| 84 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 |
---|
| 85 | } {7} |
---|
| 86 | test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { |
---|
| 87 | testnumutfchars [bytestring "\xC0\x80"] 1 |
---|
| 88 | } {1} |
---|
| 89 | |
---|
| 90 | test utf-5.1 {Tcl_UtfFindFirsts} { |
---|
| 91 | } {} |
---|
| 92 | |
---|
| 93 | test utf-6.1 {Tcl_UtfNext} { |
---|
| 94 | } {} |
---|
| 95 | |
---|
| 96 | test utf-7.1 {Tcl_UtfPrev} { |
---|
| 97 | } {} |
---|
| 98 | |
---|
| 99 | test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { |
---|
| 100 | string index abcd 0 |
---|
| 101 | } {a} |
---|
| 102 | test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { |
---|
| 103 | string index \u4e4e\u25a 0 |
---|
| 104 | } "\u4e4e" |
---|
| 105 | test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { |
---|
| 106 | string index abcd 2 |
---|
| 107 | } {c} |
---|
| 108 | test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { |
---|
| 109 | string index \u4e4e\u25a\xff\u543 2 |
---|
| 110 | } "\uff" |
---|
| 111 | |
---|
| 112 | test utf-9.1 {Tcl_UtfAtIndex: index = 0} { |
---|
| 113 | string range abcd 0 2 |
---|
| 114 | } {abc} |
---|
| 115 | test utf-9.2 {Tcl_UtfAtIndex: index > 0} { |
---|
| 116 | string range \u4e4e\u25a\xff\u543klmnop 1 5 |
---|
| 117 | } "\u25a\xff\u543kl" |
---|
| 118 | |
---|
| 119 | |
---|
| 120 | test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { |
---|
| 121 | set x \n |
---|
| 122 | } { |
---|
| 123 | } |
---|
| 124 | test utf-10.2 {Tcl_UtfBackslash: \u subst} { |
---|
| 125 | set x \ua2 |
---|
| 126 | } [bytestring "\xc2\xa2"] |
---|
| 127 | test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { |
---|
| 128 | set x \u4e21 |
---|
| 129 | } [bytestring "\xe4\xb8\xa1"] |
---|
| 130 | test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { |
---|
| 131 | set x \u4e2k |
---|
| 132 | } "[bytestring \xd3\xa2]k" |
---|
| 133 | test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { |
---|
| 134 | set x \u4e216 |
---|
| 135 | } "[bytestring \xe4\xb8\xa1]6" |
---|
| 136 | proc bsCheck {char num} { |
---|
| 137 | global errNum |
---|
| 138 | test utf-10.$errNum {backslash substitution} { |
---|
| 139 | scan $char %c value |
---|
| 140 | set value |
---|
| 141 | } $num |
---|
| 142 | incr errNum |
---|
| 143 | } |
---|
| 144 | set errNum 6 |
---|
| 145 | bsCheck \b 8 |
---|
| 146 | bsCheck \e 101 |
---|
| 147 | bsCheck \f 12 |
---|
| 148 | bsCheck \n 10 |
---|
| 149 | bsCheck \r 13 |
---|
| 150 | bsCheck \t 9 |
---|
| 151 | bsCheck \v 11 |
---|
| 152 | bsCheck \{ 123 |
---|
| 153 | bsCheck \} 125 |
---|
| 154 | bsCheck \[ 91 |
---|
| 155 | bsCheck \] 93 |
---|
| 156 | bsCheck \$ 36 |
---|
| 157 | bsCheck \ 32 |
---|
| 158 | bsCheck \; 59 |
---|
| 159 | bsCheck \\ 92 |
---|
| 160 | bsCheck \Ca 67 |
---|
| 161 | bsCheck \Ma 77 |
---|
| 162 | bsCheck \CMa 67 |
---|
| 163 | # prior to 8.3, this returned 8, as \8 as accepted as an |
---|
| 164 | # octal value - but it isn't! [Bug: 3975] |
---|
| 165 | bsCheck \8a 56 |
---|
| 166 | bsCheck \14 12 |
---|
| 167 | bsCheck \141 97 |
---|
| 168 | bsCheck b\0 98 |
---|
| 169 | bsCheck \x 120 |
---|
| 170 | bsCheck \xa 10 |
---|
| 171 | bsCheck \xA 10 |
---|
| 172 | bsCheck \x41 65 |
---|
| 173 | bsCheck \x541 65 |
---|
| 174 | bsCheck \u 117 |
---|
| 175 | bsCheck \uk 117 |
---|
| 176 | bsCheck \u41 65 |
---|
| 177 | bsCheck \ua 10 |
---|
| 178 | bsCheck \uA 10 |
---|
| 179 | bsCheck \340 224 |
---|
| 180 | bsCheck \ua1 161 |
---|
| 181 | bsCheck \u4e21 20001 |
---|
| 182 | |
---|
| 183 | test utf-11.1 {Tcl_UtfToUpper} { |
---|
| 184 | string toupper {} |
---|
| 185 | } {} |
---|
| 186 | test utf-11.2 {Tcl_UtfToUpper} { |
---|
| 187 | string toupper abc |
---|
| 188 | } ABC |
---|
| 189 | test utf-11.3 {Tcl_UtfToUpper} { |
---|
| 190 | string toupper \u00e3ab |
---|
| 191 | } \u00c3AB |
---|
| 192 | test utf-11.4 {Tcl_UtfToUpper} { |
---|
| 193 | string toupper \u01e3ab |
---|
| 194 | } \u01e2AB |
---|
| 195 | |
---|
| 196 | test utf-12.1 {Tcl_UtfToLower} { |
---|
| 197 | string tolower {} |
---|
| 198 | } {} |
---|
| 199 | test utf-12.2 {Tcl_UtfToLower} { |
---|
| 200 | string tolower ABC |
---|
| 201 | } abc |
---|
| 202 | test utf-12.3 {Tcl_UtfToLower} { |
---|
| 203 | string tolower \u00c3AB |
---|
| 204 | } \u00e3ab |
---|
| 205 | test utf-12.4 {Tcl_UtfToLower} { |
---|
| 206 | string tolower \u01e2AB |
---|
| 207 | } \u01e3ab |
---|
| 208 | |
---|
| 209 | test utf-13.1 {Tcl_UtfToTitle} { |
---|
| 210 | string totitle {} |
---|
| 211 | } {} |
---|
| 212 | test utf-13.2 {Tcl_UtfToTitle} { |
---|
| 213 | string totitle abc |
---|
| 214 | } Abc |
---|
| 215 | test utf-13.3 {Tcl_UtfToTitle} { |
---|
| 216 | string totitle \u00e3ab |
---|
| 217 | } \u00c3ab |
---|
| 218 | test utf-13.4 {Tcl_UtfToTitle} { |
---|
| 219 | string totitle \u01f3ab |
---|
| 220 | } \u01f2ab |
---|
| 221 | |
---|
| 222 | test utf-14.1 {Tcl_UtfNcasecmp} { |
---|
| 223 | string compare -nocase a b |
---|
| 224 | } -1 |
---|
| 225 | test utf-14.2 {Tcl_UtfNcasecmp} { |
---|
| 226 | string compare -nocase b a |
---|
| 227 | } 1 |
---|
| 228 | test utf-14.3 {Tcl_UtfNcasecmp} { |
---|
| 229 | string compare -nocase B a |
---|
| 230 | } 1 |
---|
| 231 | test utf-14.4 {Tcl_UtfNcasecmp} { |
---|
| 232 | string compare -nocase aBcB abca |
---|
| 233 | } 1 |
---|
| 234 | |
---|
| 235 | test utf-15.1 {Tcl_UniCharToUpper, negative delta} { |
---|
| 236 | string toupper aA |
---|
| 237 | } AA |
---|
| 238 | test utf-15.2 {Tcl_UniCharToUpper, positive delta} { |
---|
| 239 | string toupper \u0178\u00ff |
---|
| 240 | } \u0178\u0178 |
---|
| 241 | test utf-15.3 {Tcl_UniCharToUpper, no delta} { |
---|
| 242 | string toupper ! |
---|
| 243 | } ! |
---|
| 244 | |
---|
| 245 | test utf-16.1 {Tcl_UniCharToLower, negative delta} { |
---|
| 246 | string tolower aA |
---|
| 247 | } aa |
---|
| 248 | test utf-16.2 {Tcl_UniCharToLower, positive delta} { |
---|
| 249 | string tolower \u0178\u00ff |
---|
| 250 | } \u00ff\u00ff |
---|
| 251 | test utf-17.1 {Tcl_UniCharToLower, no delta} { |
---|
| 252 | string tolower ! |
---|
| 253 | } ! |
---|
| 254 | |
---|
| 255 | test utf-18.1 {Tcl_UniCharToTitle, add one for title} { |
---|
| 256 | string totitle \u01c4 |
---|
| 257 | } \u01c5 |
---|
| 258 | test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { |
---|
| 259 | string totitle \u01c6 |
---|
| 260 | } \u01c5 |
---|
| 261 | test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { |
---|
| 262 | string totitle \u017f |
---|
| 263 | } \u0053 |
---|
| 264 | test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { |
---|
| 265 | string totitle \u00ff |
---|
| 266 | } \u0178 |
---|
| 267 | test utf-18.5 {Tcl_UniCharToTitle, no delta} { |
---|
| 268 | string totitle ! |
---|
| 269 | } ! |
---|
| 270 | |
---|
| 271 | test utf-19.1 {TclUniCharLen} { |
---|
| 272 | list [regexp \\d abc456def foo] $foo |
---|
| 273 | } {1 4} |
---|
| 274 | |
---|
| 275 | test utf-20.1 {TclUniCharNcmp} { |
---|
| 276 | } {} |
---|
| 277 | |
---|
| 278 | test utf-21.1 {TclUniCharIsAlnum} { |
---|
| 279 | # this returns 1 with Unicode 3 compliance |
---|
| 280 | string is alnum \u1040\u021f |
---|
| 281 | } {1} |
---|
| 282 | test utf-21.2 {unicode alnum char in regc_locale.c} { |
---|
| 283 | # this returns 1 with Unicode 3 compliance |
---|
| 284 | list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f] |
---|
| 285 | } {1 1} |
---|
| 286 | |
---|
| 287 | test utf-22.1 {TclUniCharIsWordChar} { |
---|
| 288 | string wordend "xyz123_bar fg" 0 |
---|
| 289 | } 10 |
---|
| 290 | test utf-22.2 {TclUniCharIsWordChar} { |
---|
| 291 | string wordend "x\u5080z123_bar\u203c fg" 0 |
---|
| 292 | } 10 |
---|
| 293 | |
---|
| 294 | test utf-23.1 {TclUniCharIsAlpha} { |
---|
| 295 | # this returns 1 with Unicode 3 compliance |
---|
| 296 | string is alpha \u021f |
---|
| 297 | } {1} |
---|
| 298 | test utf-23.2 {unicode alpha char in regc_locale.c} { |
---|
| 299 | # this returns 1 with Unicode 3 compliance |
---|
| 300 | regexp {^[[:alpha:]]+$} \u021f |
---|
| 301 | } {1} |
---|
| 302 | |
---|
| 303 | test utf-24.1 {TclUniCharIsDigit} { |
---|
| 304 | # this returns 1 with Unicode 3 compliance |
---|
| 305 | string is digit \u1040 |
---|
| 306 | } {1} |
---|
| 307 | test utf-24.2 {unicode digit char in regc_locale.c} { |
---|
| 308 | # this returns 1 with Unicode 3 compliance |
---|
| 309 | list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040] |
---|
| 310 | } {1 1} |
---|
| 311 | |
---|
| 312 | test utf-24.3 {TclUniCharIsSpace} { |
---|
| 313 | # this returns 1 with Unicode 3 compliance |
---|
| 314 | string is space \u1680 |
---|
| 315 | } {1} |
---|
| 316 | test utf-24.4 {unicode space char in regc_locale.c} { |
---|
| 317 | # this returns 1 with Unicode 3 compliance |
---|
| 318 | list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680] |
---|
| 319 | } {1 1} |
---|
| 320 | |
---|
| 321 | testConstraint teststringobj [llength [info commands teststringobj]] |
---|
| 322 | |
---|
| 323 | test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
| 324 | -setup { |
---|
| 325 | testobj freeallvars |
---|
| 326 | } \ |
---|
| 327 | -body { |
---|
| 328 | teststringobj set 1 a |
---|
| 329 | teststringobj set 2 b |
---|
| 330 | teststringobj getunicode 1 |
---|
| 331 | teststringobj getunicode 2 |
---|
| 332 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
| 333 | } \ |
---|
| 334 | -cleanup { |
---|
| 335 | testobj freeallvars |
---|
| 336 | } \ |
---|
| 337 | -result -1 |
---|
| 338 | test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
| 339 | -setup { |
---|
| 340 | testobj freeallvars |
---|
| 341 | } \ |
---|
| 342 | -body { |
---|
| 343 | teststringobj set 1 b |
---|
| 344 | teststringobj set 2 a |
---|
| 345 | teststringobj getunicode 1 |
---|
| 346 | teststringobj getunicode 2 |
---|
| 347 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
| 348 | } \ |
---|
| 349 | -cleanup { |
---|
| 350 | testobj freeallvars |
---|
| 351 | } \ |
---|
| 352 | -result 1 |
---|
| 353 | test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
| 354 | -setup { |
---|
| 355 | testobj freeallvars |
---|
| 356 | } \ |
---|
| 357 | -body { |
---|
| 358 | teststringobj set 1 B |
---|
| 359 | teststringobj set 2 a |
---|
| 360 | teststringobj getunicode 1 |
---|
| 361 | teststringobj getunicode 2 |
---|
| 362 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
| 363 | } \ |
---|
| 364 | -cleanup { |
---|
| 365 | testobj freeallvars |
---|
| 366 | } \ |
---|
| 367 | -result 1 |
---|
| 368 | |
---|
| 369 | test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
---|
| 370 | -setup { |
---|
| 371 | testobj freeallvars |
---|
| 372 | } \ |
---|
| 373 | -body { |
---|
| 374 | teststringobj set 1 aBcB |
---|
| 375 | teststringobj set 2 abca |
---|
| 376 | teststringobj getunicode 1 |
---|
| 377 | teststringobj getunicode 2 |
---|
| 378 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
---|
| 379 | } \ |
---|
| 380 | -cleanup { |
---|
| 381 | testobj freeallvars |
---|
| 382 | } \ |
---|
| 383 | -result 1 |
---|
| 384 | |
---|
| 385 | # cleanup |
---|
| 386 | ::tcltest::cleanupTests |
---|
| 387 | return |
---|
| 388 | |
---|
| 389 | # Local Variables: |
---|
| 390 | # mode: tcl |
---|
| 391 | # End: |
---|