[25] | 1 | #!/bin/sh |
---|
| 2 | # The next line is executed by /bin/sh, but not tcl \ |
---|
| 3 | exec tclsh "$0" ${1+"$@"} |
---|
| 4 | |
---|
| 5 | # |
---|
| 6 | # uniClass.tcl -- |
---|
| 7 | # |
---|
| 8 | # Generates the character ranges and singletons that are used in |
---|
| 9 | # generic/regc_locale.c for translation of character classes. |
---|
| 10 | # This file must be generated using a tclsh that contains the |
---|
| 11 | # correct corresponding tclUniData.c file (generated by uniParse.tcl) |
---|
| 12 | # in order for the class ranges to match. |
---|
| 13 | # |
---|
| 14 | |
---|
| 15 | proc emitRange {first last} { |
---|
| 16 | global ranges numranges chars numchars |
---|
| 17 | |
---|
| 18 | if {$first < ($last-1)} { |
---|
| 19 | append ranges [format "{0x%04x, 0x%04x}, " \ |
---|
| 20 | $first $last] |
---|
| 21 | if {[incr numranges] % 4 == 0} { |
---|
| 22 | append ranges "\n " |
---|
| 23 | } |
---|
| 24 | } else { |
---|
| 25 | append chars [format "0x%04x, " $first] |
---|
| 26 | incr numchars |
---|
| 27 | if {$numchars % 9 == 0} { |
---|
| 28 | append chars "\n " |
---|
| 29 | } |
---|
| 30 | if {$first != $last} { |
---|
| 31 | append chars [format "0x%04x, " $last] |
---|
| 32 | incr numchars |
---|
| 33 | if {$numchars % 9 == 0} { |
---|
| 34 | append chars "\n " |
---|
| 35 | } |
---|
| 36 | } |
---|
| 37 | } |
---|
| 38 | } |
---|
| 39 | |
---|
| 40 | proc genTable {type} { |
---|
| 41 | global first last ranges numranges chars numchars |
---|
| 42 | set first -2 |
---|
| 43 | set last -2 |
---|
| 44 | |
---|
| 45 | set ranges " " |
---|
| 46 | set numranges 0 |
---|
| 47 | set chars " " |
---|
| 48 | set numchars 0 |
---|
| 49 | |
---|
| 50 | for {set i 0} {$i <= 0xFFFF} {incr i} { |
---|
| 51 | if {[string is $type [format %c $i]]} { |
---|
| 52 | if {$i == ($last + 1)} { |
---|
| 53 | set last $i |
---|
| 54 | } else { |
---|
| 55 | if {$first > 0} { |
---|
| 56 | emitRange $first $last |
---|
| 57 | } |
---|
| 58 | set first $i |
---|
| 59 | set last $i |
---|
| 60 | } |
---|
| 61 | } |
---|
| 62 | } |
---|
| 63 | emitRange $first $last |
---|
| 64 | |
---|
| 65 | set ranges [string trimright $ranges "\t\n ,"] |
---|
| 66 | set chars [string trimright $chars "\t\n ,"] |
---|
| 67 | if {$ranges != ""} { |
---|
| 68 | puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" |
---|
| 69 | puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" |
---|
| 70 | } else { |
---|
| 71 | puts "/* no contiguous ranges of $type characters */\n" |
---|
| 72 | } |
---|
| 73 | if {$chars != ""} { |
---|
| 74 | puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" |
---|
| 75 | puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" |
---|
| 76 | } else { |
---|
| 77 | puts "/* no singletons of $type characters */\n" |
---|
| 78 | } |
---|
| 79 | } |
---|
| 80 | |
---|
| 81 | puts "/* |
---|
| 82 | * Declarations of Unicode character ranges. This code |
---|
| 83 | * is automatically generated by the tools/uniClass.tcl script |
---|
| 84 | * and used in generic/regc_locale.c. Do not modify by hand. |
---|
| 85 | */ |
---|
| 86 | " |
---|
| 87 | |
---|
| 88 | foreach {type desc} { |
---|
| 89 | alpha "alphabetic characters" |
---|
| 90 | digit "decimal digit characters" |
---|
| 91 | punct "punctuation characters" |
---|
| 92 | space "white space characters" |
---|
| 93 | lower "lowercase characters" |
---|
| 94 | upper "uppercase characters" |
---|
| 95 | graph "unicode print characters excluding space" |
---|
| 96 | } { |
---|
| 97 | puts "/* Unicode: $desc */\n" |
---|
| 98 | genTable $type |
---|
| 99 | } |
---|
| 100 | |
---|
| 101 | puts "/* |
---|
| 102 | * End of auto-generated Unicode character ranges declarations. |
---|
| 103 | */" |
---|