| [25] | 1 | # word.tcl -- |
|---|
| 2 | # |
|---|
| 3 | # This file defines various procedures for computing word boundaries in |
|---|
| 4 | # strings. This file is primarily needed so Tk text and entry widgets behave |
|---|
| 5 | # properly for different platforms. |
|---|
| 6 | # |
|---|
| 7 | # Copyright (c) 1996 by Sun Microsystems, Inc. |
|---|
| 8 | # Copyright (c) 1998 by Scritpics Corporation. |
|---|
| 9 | # |
|---|
| 10 | # See the file "license.terms" for information on usage and redistribution of |
|---|
| 11 | # this file, and for a DISCLAIMER OF ALL WARRANTIES. |
|---|
| 12 | # |
|---|
| 13 | # RCS: @(#) $Id: word.tcl,v 1.10 2007/12/13 15:26:03 dgp Exp $ |
|---|
| 14 | |
|---|
| 15 | # The following variables are used to determine which characters are |
|---|
| 16 | # interpreted as white space. |
|---|
| 17 | |
|---|
| 18 | if {$::tcl_platform(platform) eq "windows"} { |
|---|
| 19 | # Windows style - any but a unicode space char |
|---|
| 20 | set ::tcl_wordchars {\S} |
|---|
| 21 | set ::tcl_nonwordchars {\s} |
|---|
| 22 | } else { |
|---|
| 23 | # Motif style - any unicode word char (number, letter, or underscore) |
|---|
| 24 | set ::tcl_wordchars {\w} |
|---|
| 25 | set ::tcl_nonwordchars {\W} |
|---|
| 26 | } |
|---|
| 27 | |
|---|
| 28 | # Arrange for caches of the real matcher REs to be kept, which enables the REs |
|---|
| 29 | # themselves to be cached for greater performance (and somewhat greater |
|---|
| 30 | # clarity too). |
|---|
| 31 | |
|---|
| 32 | namespace eval ::tcl { |
|---|
| 33 | variable WordBreakRE |
|---|
| 34 | array set WordBreakRE {} |
|---|
| 35 | |
|---|
| 36 | proc UpdateWordBreakREs args { |
|---|
| 37 | # Ignores the arguments |
|---|
| 38 | global tcl_wordchars tcl_nonwordchars |
|---|
| 39 | variable WordBreakRE |
|---|
| 40 | |
|---|
| 41 | # To keep the RE strings short... |
|---|
| 42 | set letter $tcl_wordchars |
|---|
| 43 | set space $tcl_nonwordchars |
|---|
| 44 | |
|---|
| 45 | set WordBreakRE(after) "$letter$space|$space$letter" |
|---|
| 46 | set WordBreakRE(before) "^.*($letter$space|$space$letter)" |
|---|
| 47 | set WordBreakRE(end) "$space*$letter+$space" |
|---|
| 48 | set WordBreakRE(next) "$letter*$space+$letter" |
|---|
| 49 | set WordBreakRE(previous) "$space*($letter+)$space*\$" |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | # Initialize the cache |
|---|
| 53 | UpdateWordBreakREs |
|---|
| 54 | trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs |
|---|
| 55 | trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs |
|---|
| 56 | } |
|---|
| 57 | |
|---|
| 58 | # tcl_wordBreakAfter -- |
|---|
| 59 | # |
|---|
| 60 | # This procedure returns the index of the first word boundary after the |
|---|
| 61 | # starting point in the given string, or -1 if there are no more boundaries in |
|---|
| 62 | # the given string. The index returned refers to the first character of the |
|---|
| 63 | # pair that comprises a boundary. |
|---|
| 64 | # |
|---|
| 65 | # Arguments: |
|---|
| 66 | # str - String to search. |
|---|
| 67 | # start - Index into string specifying starting point. |
|---|
| 68 | |
|---|
| 69 | proc tcl_wordBreakAfter {str start} { |
|---|
| 70 | variable ::tcl::WordBreakRE |
|---|
| 71 | set result {-1 -1} |
|---|
| 72 | regexp -indices -start $start $WordBreakRE(after) $str result |
|---|
| 73 | return [lindex $result 1] |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | # tcl_wordBreakBefore -- |
|---|
| 77 | # |
|---|
| 78 | # This procedure returns the index of the first word boundary before the |
|---|
| 79 | # starting point in the given string, or -1 if there are no more boundaries in |
|---|
| 80 | # the given string. The index returned refers to the second character of the |
|---|
| 81 | # pair that comprises a boundary. |
|---|
| 82 | # |
|---|
| 83 | # Arguments: |
|---|
| 84 | # str - String to search. |
|---|
| 85 | # start - Index into string specifying starting point. |
|---|
| 86 | |
|---|
| 87 | proc tcl_wordBreakBefore {str start} { |
|---|
| 88 | variable ::tcl::WordBreakRE |
|---|
| 89 | set result {-1 -1} |
|---|
| 90 | regexp -indices $WordBreakRE(before) [string range $str 0 $start] result |
|---|
| 91 | return [lindex $result 1] |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | # tcl_endOfWord -- |
|---|
| 95 | # |
|---|
| 96 | # This procedure returns the index of the first end-of-word location after a |
|---|
| 97 | # starting index in the given string. An end-of-word location is defined to be |
|---|
| 98 | # the first whitespace character following the first non-whitespace character |
|---|
| 99 | # after the starting point. Returns -1 if there are no more words after the |
|---|
| 100 | # starting point. |
|---|
| 101 | # |
|---|
| 102 | # Arguments: |
|---|
| 103 | # str - String to search. |
|---|
| 104 | # start - Index into string specifying starting point. |
|---|
| 105 | |
|---|
| 106 | proc tcl_endOfWord {str start} { |
|---|
| 107 | variable ::tcl::WordBreakRE |
|---|
| 108 | set result {-1 -1} |
|---|
| 109 | regexp -indices -start $start $WordBreakRE(end) $str result |
|---|
| 110 | return [lindex $result 1] |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | # tcl_startOfNextWord -- |
|---|
| 114 | # |
|---|
| 115 | # This procedure returns the index of the first start-of-word location after a |
|---|
| 116 | # starting index in the given string. A start-of-word location is defined to |
|---|
| 117 | # be a non-whitespace character following a whitespace character. Returns -1 |
|---|
| 118 | # if there are no more start-of-word locations after the starting point. |
|---|
| 119 | # |
|---|
| 120 | # Arguments: |
|---|
| 121 | # str - String to search. |
|---|
| 122 | # start - Index into string specifying starting point. |
|---|
| 123 | |
|---|
| 124 | proc tcl_startOfNextWord {str start} { |
|---|
| 125 | variable ::tcl::WordBreakRE |
|---|
| 126 | set result {-1 -1} |
|---|
| 127 | regexp -indices -start $start $WordBreakRE(next) $str result |
|---|
| 128 | return [lindex $result 1] |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | # tcl_startOfPreviousWord -- |
|---|
| 132 | # |
|---|
| 133 | # This procedure returns the index of the first start-of-word location before |
|---|
| 134 | # a starting index in the given string. |
|---|
| 135 | # |
|---|
| 136 | # Arguments: |
|---|
| 137 | # str - String to search. |
|---|
| 138 | # start - Index into string specifying starting point. |
|---|
| 139 | |
|---|
| 140 | proc tcl_startOfPreviousWord {str start} { |
|---|
| 141 | variable ::tcl::WordBreakRE |
|---|
| 142 | set word {-1 -1} |
|---|
| 143 | regexp -indices $WordBreakRE(previous) [string range $str 0 $start-1] \ |
|---|
| 144 | result word |
|---|
| 145 | return [lindex $word 0] |
|---|
| 146 | } |
|---|