1 | #!/bin/sh |
---|
2 | # The next line is executed by /bin/sh, but not tcl \ |
---|
3 | exec tclsh "$0" ${1+"$@"} |
---|
4 | |
---|
5 | # |
---|
6 | # uniClass.tcl -- |
---|
7 | # |
---|
8 | # Generates the character ranges and singletons that are used in |
---|
9 | # generic/regc_locale.c for translation of character classes. |
---|
10 | # This file must be generated using a tclsh that contains the |
---|
11 | # correct corresponding tclUniData.c file (generated by uniParse.tcl) |
---|
12 | # in order for the class ranges to match. |
---|
13 | # |
---|
14 | |
---|
15 | proc emitRange {first last} { |
---|
16 | global ranges numranges chars numchars |
---|
17 | |
---|
18 | if {$first < ($last-1)} { |
---|
19 | append ranges [format "{0x%04x, 0x%04x}, " \ |
---|
20 | $first $last] |
---|
21 | if {[incr numranges] % 4 == 0} { |
---|
22 | append ranges "\n " |
---|
23 | } |
---|
24 | } else { |
---|
25 | append chars [format "0x%04x, " $first] |
---|
26 | incr numchars |
---|
27 | if {$numchars % 9 == 0} { |
---|
28 | append chars "\n " |
---|
29 | } |
---|
30 | if {$first != $last} { |
---|
31 | append chars [format "0x%04x, " $last] |
---|
32 | incr numchars |
---|
33 | if {$numchars % 9 == 0} { |
---|
34 | append chars "\n " |
---|
35 | } |
---|
36 | } |
---|
37 | } |
---|
38 | } |
---|
39 | |
---|
40 | proc genTable {type} { |
---|
41 | global first last ranges numranges chars numchars |
---|
42 | set first -2 |
---|
43 | set last -2 |
---|
44 | |
---|
45 | set ranges " " |
---|
46 | set numranges 0 |
---|
47 | set chars " " |
---|
48 | set numchars 0 |
---|
49 | |
---|
50 | for {set i 0} {$i <= 0xFFFF} {incr i} { |
---|
51 | if {[string is $type [format %c $i]]} { |
---|
52 | if {$i == ($last + 1)} { |
---|
53 | set last $i |
---|
54 | } else { |
---|
55 | if {$first > 0} { |
---|
56 | emitRange $first $last |
---|
57 | } |
---|
58 | set first $i |
---|
59 | set last $i |
---|
60 | } |
---|
61 | } |
---|
62 | } |
---|
63 | emitRange $first $last |
---|
64 | |
---|
65 | set ranges [string trimright $ranges "\t\n ,"] |
---|
66 | set chars [string trimright $chars "\t\n ,"] |
---|
67 | if {$ranges != ""} { |
---|
68 | puts "static crange ${type}RangeTable\[\] = {\n$ranges\n};\n" |
---|
69 | puts "#define NUM_[string toupper $type]_RANGE (sizeof(${type}RangeTable)/sizeof(crange))\n" |
---|
70 | } else { |
---|
71 | puts "/* no contiguous ranges of $type characters */\n" |
---|
72 | } |
---|
73 | if {$chars != ""} { |
---|
74 | puts "static chr ${type}CharTable\[\] = {\n$chars\n};\n" |
---|
75 | puts "#define NUM_[string toupper $type]_CHAR (sizeof(${type}CharTable)/sizeof(chr))\n" |
---|
76 | } else { |
---|
77 | puts "/* no singletons of $type characters */\n" |
---|
78 | } |
---|
79 | } |
---|
80 | |
---|
81 | puts "/* |
---|
82 | * Declarations of Unicode character ranges. This code |
---|
83 | * is automatically generated by the tools/uniClass.tcl script |
---|
84 | * and used in generic/regc_locale.c. Do not modify by hand. |
---|
85 | */ |
---|
86 | " |
---|
87 | |
---|
88 | foreach {type desc} { |
---|
89 | alpha "alphabetic characters" |
---|
90 | digit "decimal digit characters" |
---|
91 | punct "punctuation characters" |
---|
92 | space "white space characters" |
---|
93 | lower "lowercase characters" |
---|
94 | upper "uppercase characters" |
---|
95 | graph "unicode print characters excluding space" |
---|
96 | } { |
---|
97 | puts "/* Unicode: $desc */\n" |
---|
98 | genTable $type |
---|
99 | } |
---|
100 | |
---|
101 | puts "/* |
---|
102 | * End of auto-generated Unicode character ranges declarations. |
---|
103 | */" |
---|