| [25] | 1 | # This file contains a collection of tests for tclUtf.c |
|---|
| 2 | # Sourcing this file into Tcl runs the tests and generates output for |
|---|
| 3 | # errors. No output means no errors were found. |
|---|
| 4 | # |
|---|
| 5 | # Copyright (c) 1997 Sun Microsystems, Inc. |
|---|
| 6 | # Copyright (c) 1998-1999 by Scriptics Corporation. |
|---|
| 7 | # |
|---|
| 8 | # See the file "license.terms" for information on usage and redistribution |
|---|
| 9 | # of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
|---|
| 10 | # |
|---|
| 11 | # RCS: @(#) $Id: utf.test,v 1.14 2007/05/02 01:37:28 kennykb Exp $ |
|---|
| 12 | |
|---|
| 13 | if {[lsearch [namespace children] ::tcltest] == -1} { |
|---|
| 14 | package require tcltest 2 |
|---|
| 15 | namespace import -force ::tcltest::* |
|---|
| 16 | } |
|---|
| 17 | |
|---|
| 18 | catch {unset x} |
|---|
| 19 | |
|---|
| 20 | test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { |
|---|
| 21 | set x \x01 |
|---|
| 22 | } [bytestring "\x01"] |
|---|
| 23 | test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { |
|---|
| 24 | set x "\x00" |
|---|
| 25 | } [bytestring "\xc0\x80"] |
|---|
| 26 | test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { |
|---|
| 27 | set x "\xe0" |
|---|
| 28 | } [bytestring "\xc3\xa0"] |
|---|
| 29 | test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { |
|---|
| 30 | set x "\u4e4e" |
|---|
| 31 | } [bytestring "\xe4\xb9\x8e"] |
|---|
| 32 | test utf-1.5 {Tcl_UniCharToUtf: negative Tcl_UniChar} { |
|---|
| 33 | string length [format %c -1] |
|---|
| 34 | } 1 |
|---|
| 35 | |
|---|
| 36 | test utf-2.1 {Tcl_UtfToUniChar: low ascii} { |
|---|
| 37 | string length "abc" |
|---|
| 38 | } {3} |
|---|
| 39 | test utf-2.2 {Tcl_UtfToUniChar: naked trail bytes} { |
|---|
| 40 | string length [bytestring "\x82\x83\x84"] |
|---|
| 41 | } {3} |
|---|
| 42 | test utf-2.3 {Tcl_UtfToUniChar: lead (2-byte) followed by non-trail} { |
|---|
| 43 | string length [bytestring "\xC2"] |
|---|
| 44 | } {1} |
|---|
| 45 | test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { |
|---|
| 46 | string length [bytestring "\xC2\xa2"] |
|---|
| 47 | } {1} |
|---|
| 48 | test utf-2.5 {Tcl_UtfToUniChar: lead (3-byte) followed by non-trail} { |
|---|
| 49 | string length [bytestring "\xE2"] |
|---|
| 50 | } {1} |
|---|
| 51 | test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { |
|---|
| 52 | string length [bytestring "\xE2\xA2"] |
|---|
| 53 | } {2} |
|---|
| 54 | test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { |
|---|
| 55 | string length [bytestring "\xE4\xb9\x8e"] |
|---|
| 56 | } {1} |
|---|
| 57 | test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { |
|---|
| 58 | string length [bytestring "\xF4\xA2\xA2\xA2"] |
|---|
| 59 | } {4} |
|---|
| 60 | |
|---|
| 61 | test utf-3.1 {Tcl_UtfCharComplete} { |
|---|
| 62 | } {} |
|---|
| 63 | |
|---|
| 64 | testConstraint testnumutfchars [llength [info commands testnumutfchars]] |
|---|
| 65 | test utf-4.1 {Tcl_NumUtfChars: zero length} testnumutfchars { |
|---|
| 66 | testnumutfchars "" |
|---|
| 67 | } {0} |
|---|
| 68 | test utf-4.2 {Tcl_NumUtfChars: length 1} testnumutfchars { |
|---|
| 69 | testnumutfchars [bytestring "\xC2\xA2"] |
|---|
| 70 | } {1} |
|---|
| 71 | test utf-4.3 {Tcl_NumUtfChars: long string} testnumutfchars { |
|---|
| 72 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] |
|---|
| 73 | } {7} |
|---|
| 74 | test utf-4.4 {Tcl_NumUtfChars: #u0000} testnumutfchars { |
|---|
| 75 | testnumutfchars [bytestring "\xC0\x80"] |
|---|
| 76 | } {1} |
|---|
| 77 | test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} testnumutfchars { |
|---|
| 78 | testnumutfchars "" 1 |
|---|
| 79 | } {0} |
|---|
| 80 | test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} testnumutfchars { |
|---|
| 81 | testnumutfchars [bytestring "\xC2\xA2"] 1 |
|---|
| 82 | } {1} |
|---|
| 83 | test utf-4.7 {Tcl_NumUtfChars: long string, calc len} testnumutfchars { |
|---|
| 84 | testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 |
|---|
| 85 | } {7} |
|---|
| 86 | test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} testnumutfchars { |
|---|
| 87 | testnumutfchars [bytestring "\xC0\x80"] 1 |
|---|
| 88 | } {1} |
|---|
| 89 | |
|---|
| 90 | test utf-5.1 {Tcl_UtfFindFirsts} { |
|---|
| 91 | } {} |
|---|
| 92 | |
|---|
| 93 | test utf-6.1 {Tcl_UtfNext} { |
|---|
| 94 | } {} |
|---|
| 95 | |
|---|
| 96 | test utf-7.1 {Tcl_UtfPrev} { |
|---|
| 97 | } {} |
|---|
| 98 | |
|---|
| 99 | test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { |
|---|
| 100 | string index abcd 0 |
|---|
| 101 | } {a} |
|---|
| 102 | test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { |
|---|
| 103 | string index \u4e4e\u25a 0 |
|---|
| 104 | } "\u4e4e" |
|---|
| 105 | test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { |
|---|
| 106 | string index abcd 2 |
|---|
| 107 | } {c} |
|---|
| 108 | test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { |
|---|
| 109 | string index \u4e4e\u25a\xff\u543 2 |
|---|
| 110 | } "\uff" |
|---|
| 111 | |
|---|
| 112 | test utf-9.1 {Tcl_UtfAtIndex: index = 0} { |
|---|
| 113 | string range abcd 0 2 |
|---|
| 114 | } {abc} |
|---|
| 115 | test utf-9.2 {Tcl_UtfAtIndex: index > 0} { |
|---|
| 116 | string range \u4e4e\u25a\xff\u543klmnop 1 5 |
|---|
| 117 | } "\u25a\xff\u543kl" |
|---|
| 118 | |
|---|
| 119 | |
|---|
| 120 | test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { |
|---|
| 121 | set x \n |
|---|
| 122 | } { |
|---|
| 123 | } |
|---|
| 124 | test utf-10.2 {Tcl_UtfBackslash: \u subst} { |
|---|
| 125 | set x \ua2 |
|---|
| 126 | } [bytestring "\xc2\xa2"] |
|---|
| 127 | test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { |
|---|
| 128 | set x \u4e21 |
|---|
| 129 | } [bytestring "\xe4\xb8\xa1"] |
|---|
| 130 | test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { |
|---|
| 131 | set x \u4e2k |
|---|
| 132 | } "[bytestring \xd3\xa2]k" |
|---|
| 133 | test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { |
|---|
| 134 | set x \u4e216 |
|---|
| 135 | } "[bytestring \xe4\xb8\xa1]6" |
|---|
| 136 | proc bsCheck {char num} { |
|---|
| 137 | global errNum |
|---|
| 138 | test utf-10.$errNum {backslash substitution} { |
|---|
| 139 | scan $char %c value |
|---|
| 140 | set value |
|---|
| 141 | } $num |
|---|
| 142 | incr errNum |
|---|
| 143 | } |
|---|
| 144 | set errNum 6 |
|---|
| 145 | bsCheck \b 8 |
|---|
| 146 | bsCheck \e 101 |
|---|
| 147 | bsCheck \f 12 |
|---|
| 148 | bsCheck \n 10 |
|---|
| 149 | bsCheck \r 13 |
|---|
| 150 | bsCheck \t 9 |
|---|
| 151 | bsCheck \v 11 |
|---|
| 152 | bsCheck \{ 123 |
|---|
| 153 | bsCheck \} 125 |
|---|
| 154 | bsCheck \[ 91 |
|---|
| 155 | bsCheck \] 93 |
|---|
| 156 | bsCheck \$ 36 |
|---|
| 157 | bsCheck \ 32 |
|---|
| 158 | bsCheck \; 59 |
|---|
| 159 | bsCheck \\ 92 |
|---|
| 160 | bsCheck \Ca 67 |
|---|
| 161 | bsCheck \Ma 77 |
|---|
| 162 | bsCheck \CMa 67 |
|---|
| 163 | # prior to 8.3, this returned 8, as \8 as accepted as an |
|---|
| 164 | # octal value - but it isn't! [Bug: 3975] |
|---|
| 165 | bsCheck \8a 56 |
|---|
| 166 | bsCheck \14 12 |
|---|
| 167 | bsCheck \141 97 |
|---|
| 168 | bsCheck b\0 98 |
|---|
| 169 | bsCheck \x 120 |
|---|
| 170 | bsCheck \xa 10 |
|---|
| 171 | bsCheck \xA 10 |
|---|
| 172 | bsCheck \x41 65 |
|---|
| 173 | bsCheck \x541 65 |
|---|
| 174 | bsCheck \u 117 |
|---|
| 175 | bsCheck \uk 117 |
|---|
| 176 | bsCheck \u41 65 |
|---|
| 177 | bsCheck \ua 10 |
|---|
| 178 | bsCheck \uA 10 |
|---|
| 179 | bsCheck \340 224 |
|---|
| 180 | bsCheck \ua1 161 |
|---|
| 181 | bsCheck \u4e21 20001 |
|---|
| 182 | |
|---|
| 183 | test utf-11.1 {Tcl_UtfToUpper} { |
|---|
| 184 | string toupper {} |
|---|
| 185 | } {} |
|---|
| 186 | test utf-11.2 {Tcl_UtfToUpper} { |
|---|
| 187 | string toupper abc |
|---|
| 188 | } ABC |
|---|
| 189 | test utf-11.3 {Tcl_UtfToUpper} { |
|---|
| 190 | string toupper \u00e3ab |
|---|
| 191 | } \u00c3AB |
|---|
| 192 | test utf-11.4 {Tcl_UtfToUpper} { |
|---|
| 193 | string toupper \u01e3ab |
|---|
| 194 | } \u01e2AB |
|---|
| 195 | |
|---|
| 196 | test utf-12.1 {Tcl_UtfToLower} { |
|---|
| 197 | string tolower {} |
|---|
| 198 | } {} |
|---|
| 199 | test utf-12.2 {Tcl_UtfToLower} { |
|---|
| 200 | string tolower ABC |
|---|
| 201 | } abc |
|---|
| 202 | test utf-12.3 {Tcl_UtfToLower} { |
|---|
| 203 | string tolower \u00c3AB |
|---|
| 204 | } \u00e3ab |
|---|
| 205 | test utf-12.4 {Tcl_UtfToLower} { |
|---|
| 206 | string tolower \u01e2AB |
|---|
| 207 | } \u01e3ab |
|---|
| 208 | |
|---|
| 209 | test utf-13.1 {Tcl_UtfToTitle} { |
|---|
| 210 | string totitle {} |
|---|
| 211 | } {} |
|---|
| 212 | test utf-13.2 {Tcl_UtfToTitle} { |
|---|
| 213 | string totitle abc |
|---|
| 214 | } Abc |
|---|
| 215 | test utf-13.3 {Tcl_UtfToTitle} { |
|---|
| 216 | string totitle \u00e3ab |
|---|
| 217 | } \u00c3ab |
|---|
| 218 | test utf-13.4 {Tcl_UtfToTitle} { |
|---|
| 219 | string totitle \u01f3ab |
|---|
| 220 | } \u01f2ab |
|---|
| 221 | |
|---|
| 222 | test utf-14.1 {Tcl_UtfNcasecmp} { |
|---|
| 223 | string compare -nocase a b |
|---|
| 224 | } -1 |
|---|
| 225 | test utf-14.2 {Tcl_UtfNcasecmp} { |
|---|
| 226 | string compare -nocase b a |
|---|
| 227 | } 1 |
|---|
| 228 | test utf-14.3 {Tcl_UtfNcasecmp} { |
|---|
| 229 | string compare -nocase B a |
|---|
| 230 | } 1 |
|---|
| 231 | test utf-14.4 {Tcl_UtfNcasecmp} { |
|---|
| 232 | string compare -nocase aBcB abca |
|---|
| 233 | } 1 |
|---|
| 234 | |
|---|
| 235 | test utf-15.1 {Tcl_UniCharToUpper, negative delta} { |
|---|
| 236 | string toupper aA |
|---|
| 237 | } AA |
|---|
| 238 | test utf-15.2 {Tcl_UniCharToUpper, positive delta} { |
|---|
| 239 | string toupper \u0178\u00ff |
|---|
| 240 | } \u0178\u0178 |
|---|
| 241 | test utf-15.3 {Tcl_UniCharToUpper, no delta} { |
|---|
| 242 | string toupper ! |
|---|
| 243 | } ! |
|---|
| 244 | |
|---|
| 245 | test utf-16.1 {Tcl_UniCharToLower, negative delta} { |
|---|
| 246 | string tolower aA |
|---|
| 247 | } aa |
|---|
| 248 | test utf-16.2 {Tcl_UniCharToLower, positive delta} { |
|---|
| 249 | string tolower \u0178\u00ff |
|---|
| 250 | } \u00ff\u00ff |
|---|
| 251 | test utf-17.1 {Tcl_UniCharToLower, no delta} { |
|---|
| 252 | string tolower ! |
|---|
| 253 | } ! |
|---|
| 254 | |
|---|
| 255 | test utf-18.1 {Tcl_UniCharToTitle, add one for title} { |
|---|
| 256 | string totitle \u01c4 |
|---|
| 257 | } \u01c5 |
|---|
| 258 | test utf-18.2 {Tcl_UniCharToTitle, subtract one for title} { |
|---|
| 259 | string totitle \u01c6 |
|---|
| 260 | } \u01c5 |
|---|
| 261 | test utf-18.3 {Tcl_UniCharToTitle, subtract delta for title (positive)} { |
|---|
| 262 | string totitle \u017f |
|---|
| 263 | } \u0053 |
|---|
| 264 | test utf-18.4 {Tcl_UniCharToTitle, subtract delta for title (negative)} { |
|---|
| 265 | string totitle \u00ff |
|---|
| 266 | } \u0178 |
|---|
| 267 | test utf-18.5 {Tcl_UniCharToTitle, no delta} { |
|---|
| 268 | string totitle ! |
|---|
| 269 | } ! |
|---|
| 270 | |
|---|
| 271 | test utf-19.1 {TclUniCharLen} { |
|---|
| 272 | list [regexp \\d abc456def foo] $foo |
|---|
| 273 | } {1 4} |
|---|
| 274 | |
|---|
| 275 | test utf-20.1 {TclUniCharNcmp} { |
|---|
| 276 | } {} |
|---|
| 277 | |
|---|
| 278 | test utf-21.1 {TclUniCharIsAlnum} { |
|---|
| 279 | # this returns 1 with Unicode 3 compliance |
|---|
| 280 | string is alnum \u1040\u021f |
|---|
| 281 | } {1} |
|---|
| 282 | test utf-21.2 {unicode alnum char in regc_locale.c} { |
|---|
| 283 | # this returns 1 with Unicode 3 compliance |
|---|
| 284 | list [regexp {^[[:alnum:]]+$} \u1040\u021f] [regexp {^\w+$} \u1040\u021f] |
|---|
| 285 | } {1 1} |
|---|
| 286 | |
|---|
| 287 | test utf-22.1 {TclUniCharIsWordChar} { |
|---|
| 288 | string wordend "xyz123_bar fg" 0 |
|---|
| 289 | } 10 |
|---|
| 290 | test utf-22.2 {TclUniCharIsWordChar} { |
|---|
| 291 | string wordend "x\u5080z123_bar\u203c fg" 0 |
|---|
| 292 | } 10 |
|---|
| 293 | |
|---|
| 294 | test utf-23.1 {TclUniCharIsAlpha} { |
|---|
| 295 | # this returns 1 with Unicode 3 compliance |
|---|
| 296 | string is alpha \u021f |
|---|
| 297 | } {1} |
|---|
| 298 | test utf-23.2 {unicode alpha char in regc_locale.c} { |
|---|
| 299 | # this returns 1 with Unicode 3 compliance |
|---|
| 300 | regexp {^[[:alpha:]]+$} \u021f |
|---|
| 301 | } {1} |
|---|
| 302 | |
|---|
| 303 | test utf-24.1 {TclUniCharIsDigit} { |
|---|
| 304 | # this returns 1 with Unicode 3 compliance |
|---|
| 305 | string is digit \u1040 |
|---|
| 306 | } {1} |
|---|
| 307 | test utf-24.2 {unicode digit char in regc_locale.c} { |
|---|
| 308 | # this returns 1 with Unicode 3 compliance |
|---|
| 309 | list [regexp {^[[:digit:]]+$} \u1040] [regexp {^\d+$} \u1040] |
|---|
| 310 | } {1 1} |
|---|
| 311 | |
|---|
| 312 | test utf-24.3 {TclUniCharIsSpace} { |
|---|
| 313 | # this returns 1 with Unicode 3 compliance |
|---|
| 314 | string is space \u1680 |
|---|
| 315 | } {1} |
|---|
| 316 | test utf-24.4 {unicode space char in regc_locale.c} { |
|---|
| 317 | # this returns 1 with Unicode 3 compliance |
|---|
| 318 | list [regexp {^[[:space:]]+$} \u1680] [regexp {^\s+$} \u1680] |
|---|
| 319 | } {1 1} |
|---|
| 320 | |
|---|
| 321 | testConstraint teststringobj [llength [info commands teststringobj]] |
|---|
| 322 | |
|---|
| 323 | test utf-25.1 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
|---|
| 324 | -setup { |
|---|
| 325 | testobj freeallvars |
|---|
| 326 | } \ |
|---|
| 327 | -body { |
|---|
| 328 | teststringobj set 1 a |
|---|
| 329 | teststringobj set 2 b |
|---|
| 330 | teststringobj getunicode 1 |
|---|
| 331 | teststringobj getunicode 2 |
|---|
| 332 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
|---|
| 333 | } \ |
|---|
| 334 | -cleanup { |
|---|
| 335 | testobj freeallvars |
|---|
| 336 | } \ |
|---|
| 337 | -result -1 |
|---|
| 338 | test utf-25.2 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
|---|
| 339 | -setup { |
|---|
| 340 | testobj freeallvars |
|---|
| 341 | } \ |
|---|
| 342 | -body { |
|---|
| 343 | teststringobj set 1 b |
|---|
| 344 | teststringobj set 2 a |
|---|
| 345 | teststringobj getunicode 1 |
|---|
| 346 | teststringobj getunicode 2 |
|---|
| 347 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
|---|
| 348 | } \ |
|---|
| 349 | -cleanup { |
|---|
| 350 | testobj freeallvars |
|---|
| 351 | } \ |
|---|
| 352 | -result 1 |
|---|
| 353 | test utf-25.3 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
|---|
| 354 | -setup { |
|---|
| 355 | testobj freeallvars |
|---|
| 356 | } \ |
|---|
| 357 | -body { |
|---|
| 358 | teststringobj set 1 B |
|---|
| 359 | teststringobj set 2 a |
|---|
| 360 | teststringobj getunicode 1 |
|---|
| 361 | teststringobj getunicode 2 |
|---|
| 362 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
|---|
| 363 | } \ |
|---|
| 364 | -cleanup { |
|---|
| 365 | testobj freeallvars |
|---|
| 366 | } \ |
|---|
| 367 | -result 1 |
|---|
| 368 | |
|---|
| 369 | test utf-25.4 {Tcl_UniCharNcasecmp} -constraints teststringobj \ |
|---|
| 370 | -setup { |
|---|
| 371 | testobj freeallvars |
|---|
| 372 | } \ |
|---|
| 373 | -body { |
|---|
| 374 | teststringobj set 1 aBcB |
|---|
| 375 | teststringobj set 2 abca |
|---|
| 376 | teststringobj getunicode 1 |
|---|
| 377 | teststringobj getunicode 2 |
|---|
| 378 | string compare -nocase [teststringobj get 1] [teststringobj get 2] |
|---|
| 379 | } \ |
|---|
| 380 | -cleanup { |
|---|
| 381 | testobj freeallvars |
|---|
| 382 | } \ |
|---|
| 383 | -result 1 |
|---|
| 384 | |
|---|
| 385 | # cleanup |
|---|
| 386 | ::tcltest::cleanupTests |
|---|
| 387 | return |
|---|
| 388 | |
|---|
| 389 | # Local Variables: |
|---|
| 390 | # mode: tcl |
|---|
| 391 | # End: |
|---|