| 1 | /* |
|---|
| 2 | * |
|---|
| 3 | * Copyright (c) 2004 |
|---|
| 4 | * John Maddock |
|---|
| 5 | * |
|---|
| 6 | * Use, modification and distribution are subject to the |
|---|
| 7 | * Boost Software License, Version 1.0. (See accompanying file |
|---|
| 8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 9 | * |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | /* |
|---|
| 13 | * LOCATION: see http://www.boost.org for most recent version. |
|---|
| 14 | * FILE: wc_regex_traits.cpp |
|---|
| 15 | * VERSION: see <boost/version.hpp> |
|---|
| 16 | * DESCRIPTION: Implements out of line members for c_regex_traits<wchar_t> |
|---|
| 17 | */ |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | #define BOOST_REGEX_SOURCE |
|---|
| 21 | |
|---|
| 22 | #include <boost/config.hpp> |
|---|
| 23 | #include <boost/detail/workaround.hpp> |
|---|
| 24 | |
|---|
| 25 | #if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) |
|---|
| 26 | |
|---|
| 27 | #include <boost/regex/v4/c_regex_traits.hpp> |
|---|
| 28 | #ifndef BOOST_NO_WREGEX |
|---|
| 29 | #include <boost/regex/v4/primary_transform.hpp> |
|---|
| 30 | #include <boost/regex/v4/regex_traits_defaults.hpp> |
|---|
| 31 | |
|---|
| 32 | #if defined(BOOST_NO_STDC_NAMESPACE) |
|---|
| 33 | namespace std{ |
|---|
| 34 | using ::wcstol; |
|---|
| 35 | } |
|---|
| 36 | #endif |
|---|
| 37 | |
|---|
| 38 | namespace boost{ |
|---|
| 39 | |
|---|
| 40 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2) |
|---|
| 41 | { |
|---|
| 42 | std::size_t r; |
|---|
| 43 | std::size_t s = 10; |
|---|
| 44 | std::wstring src(p1, p2); |
|---|
| 45 | std::wstring result(s, L' '); |
|---|
| 46 | while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) |
|---|
| 47 | { |
|---|
| 48 | result.append(r - s + 3, L' '); |
|---|
| 49 | s = result.size(); |
|---|
| 50 | } |
|---|
| 51 | result.erase(r); |
|---|
| 52 | return result; |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2) |
|---|
| 56 | { |
|---|
| 57 | static wchar_t s_delim; |
|---|
| 58 | static const int s_collate_type = ::boost::re_detail::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim); |
|---|
| 59 | std::wstring result; |
|---|
| 60 | // |
|---|
| 61 | // What we do here depends upon the format of the sort key returned by |
|---|
| 62 | // sort key returned by this->transform: |
|---|
| 63 | // |
|---|
| 64 | switch(s_collate_type) |
|---|
| 65 | { |
|---|
| 66 | case ::boost::re_detail::sort_C: |
|---|
| 67 | case ::boost::re_detail::sort_unknown: |
|---|
| 68 | // the best we can do is translate to lower case, then get a regular sort key: |
|---|
| 69 | { |
|---|
| 70 | result.assign(p1, p2); |
|---|
| 71 | for(std::wstring::size_type i = 0; i < result.size(); ++i) |
|---|
| 72 | result[i] = (std::towlower)(result[i]); |
|---|
| 73 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
|---|
| 74 | break; |
|---|
| 75 | } |
|---|
| 76 | case ::boost::re_detail::sort_fixed: |
|---|
| 77 | { |
|---|
| 78 | // get a regular sort key, and then truncate it: |
|---|
| 79 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
|---|
| 80 | result.erase(s_delim); |
|---|
| 81 | break; |
|---|
| 82 | } |
|---|
| 83 | case ::boost::re_detail::sort_delim: |
|---|
| 84 | // get a regular sort key, and then truncate everything after the delim: |
|---|
| 85 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
|---|
| 86 | if(result.size() && (result[0] == s_delim)) |
|---|
| 87 | break; |
|---|
| 88 | std::size_t i; |
|---|
| 89 | for(i = 0; i < result.size(); ++i) |
|---|
| 90 | { |
|---|
| 91 | if(result[i] == s_delim) |
|---|
| 92 | break; |
|---|
| 93 | } |
|---|
| 94 | result.erase(i); |
|---|
| 95 | break; |
|---|
| 96 | } |
|---|
| 97 | if(result.empty()) |
|---|
| 98 | result = std::wstring(1, char(0)); |
|---|
| 99 | return result; |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | enum |
|---|
| 103 | { |
|---|
| 104 | char_class_space=1<<0, |
|---|
| 105 | char_class_print=1<<1, |
|---|
| 106 | char_class_cntrl=1<<2, |
|---|
| 107 | char_class_upper=1<<3, |
|---|
| 108 | char_class_lower=1<<4, |
|---|
| 109 | char_class_alpha=1<<5, |
|---|
| 110 | char_class_digit=1<<6, |
|---|
| 111 | char_class_punct=1<<7, |
|---|
| 112 | char_class_xdigit=1<<8, |
|---|
| 113 | char_class_alnum=char_class_alpha|char_class_digit, |
|---|
| 114 | char_class_graph=char_class_alnum|char_class_punct, |
|---|
| 115 | char_class_blank=1<<9, |
|---|
| 116 | char_class_word=1<<10, |
|---|
| 117 | char_class_unicode=1<<11 |
|---|
| 118 | }; |
|---|
| 119 | |
|---|
| 120 | c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2) |
|---|
| 121 | { |
|---|
| 122 | static const char_class_type masks[] = |
|---|
| 123 | { |
|---|
| 124 | 0, |
|---|
| 125 | char_class_alnum, |
|---|
| 126 | char_class_alpha, |
|---|
| 127 | char_class_blank, |
|---|
| 128 | char_class_cntrl, |
|---|
| 129 | char_class_digit, |
|---|
| 130 | char_class_digit, |
|---|
| 131 | char_class_graph, |
|---|
| 132 | char_class_lower, |
|---|
| 133 | char_class_lower, |
|---|
| 134 | char_class_print, |
|---|
| 135 | char_class_punct, |
|---|
| 136 | char_class_space, |
|---|
| 137 | char_class_space, |
|---|
| 138 | char_class_upper, |
|---|
| 139 | char_class_unicode, |
|---|
| 140 | char_class_upper, |
|---|
| 141 | char_class_alnum | char_class_word, |
|---|
| 142 | char_class_alnum | char_class_word, |
|---|
| 143 | char_class_xdigit, |
|---|
| 144 | }; |
|---|
| 145 | |
|---|
| 146 | int id = ::boost::re_detail::get_default_class_id(p1, p2); |
|---|
| 147 | if(id < 0) |
|---|
| 148 | { |
|---|
| 149 | std::wstring s(p1, p2); |
|---|
| 150 | for(std::wstring::size_type i = 0; i < s.size(); ++i) |
|---|
| 151 | s[i] = (std::towlower)(s[i]); |
|---|
| 152 | id = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); |
|---|
| 153 | } |
|---|
| 154 | BOOST_ASSERT(id+1 < static_cast<int>(sizeof(masks) / sizeof(masks[0]))); |
|---|
| 155 | return masks[id+1]; |
|---|
| 156 | } |
|---|
| 157 | |
|---|
| 158 | bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask) |
|---|
| 159 | { |
|---|
| 160 | return |
|---|
| 161 | ((mask & char_class_space) && (std::iswspace)(c)) |
|---|
| 162 | || ((mask & char_class_print) && (std::iswprint)(c)) |
|---|
| 163 | || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) |
|---|
| 164 | || ((mask & char_class_upper) && (std::iswupper)(c)) |
|---|
| 165 | || ((mask & char_class_lower) && (std::iswlower)(c)) |
|---|
| 166 | || ((mask & char_class_alpha) && (std::iswalpha)(c)) |
|---|
| 167 | || ((mask & char_class_digit) && (std::iswdigit)(c)) |
|---|
| 168 | || ((mask & char_class_punct) && (std::iswpunct)(c)) |
|---|
| 169 | || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) |
|---|
| 170 | || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::re_detail::is_separator(c)) |
|---|
| 171 | || ((mask & char_class_word) && (c == '_')) |
|---|
| 172 | || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff))); |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2) |
|---|
| 176 | { |
|---|
| 177 | #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ |
|---|
| 178 | && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ |
|---|
| 179 | && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) |
|---|
| 180 | std::string name(p1, p2); |
|---|
| 181 | #else |
|---|
| 182 | std::string name; |
|---|
| 183 | const wchar_t* p0 = p1; |
|---|
| 184 | while(p0 != p2) |
|---|
| 185 | name.append(1, char(*p0++)); |
|---|
| 186 | #endif |
|---|
| 187 | name = ::boost::re_detail::lookup_default_collate_name(name); |
|---|
| 188 | #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ |
|---|
| 189 | && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ |
|---|
| 190 | && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) |
|---|
| 191 | if(name.size()) |
|---|
| 192 | return string_type(name.begin(), name.end()); |
|---|
| 193 | #else |
|---|
| 194 | if(name.size()) |
|---|
| 195 | { |
|---|
| 196 | string_type result; |
|---|
| 197 | typedef std::string::const_iterator iter; |
|---|
| 198 | iter b = name.begin(); |
|---|
| 199 | iter e = name.end(); |
|---|
| 200 | while(b != e) |
|---|
| 201 | result.append(1, wchar_t(*b++)); |
|---|
| 202 | return result; |
|---|
| 203 | } |
|---|
| 204 | #endif |
|---|
| 205 | if(p2 - p1 == 1) |
|---|
| 206 | return string_type(1, *p1); |
|---|
| 207 | return string_type(); |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix) |
|---|
| 211 | { |
|---|
| 212 | #ifdef __BORLANDC__ |
|---|
| 213 | // workaround for broken wcstol: |
|---|
| 214 | if((std::iswxdigit)(c) == 0) |
|---|
| 215 | return -1; |
|---|
| 216 | #endif |
|---|
| 217 | wchar_t b[2] = { c, '\0', }; |
|---|
| 218 | wchar_t* ep; |
|---|
| 219 | int result = std::wcstol(b, &ep, radix); |
|---|
| 220 | if(ep == b) |
|---|
| 221 | return -1; |
|---|
| 222 | return result; |
|---|
| 223 | } |
|---|
| 224 | |
|---|
| 225 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
|---|
| 226 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform(const unsigned short* p1, const unsigned short* p2) |
|---|
| 227 | { |
|---|
| 228 | std::wstring result = c_regex_traits<wchar_t>::transform((const wchar_t*)p1, (const wchar_t*)p2); |
|---|
| 229 | return string_type(result.begin(), result.end()); |
|---|
| 230 | } |
|---|
| 231 | |
|---|
| 232 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform_primary(const unsigned short* p1, const unsigned short* p2) |
|---|
| 233 | { |
|---|
| 234 | std::wstring result = c_regex_traits<wchar_t>::transform_primary((const wchar_t*)p1, (const wchar_t*)p2); |
|---|
| 235 | return string_type(result.begin(), result.end()); |
|---|
| 236 | } |
|---|
| 237 | |
|---|
| 238 | c_regex_traits<unsigned short>::char_class_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_classname(const unsigned short* p1, const unsigned short* p2) |
|---|
| 239 | { |
|---|
| 240 | return c_regex_traits<wchar_t>::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2); |
|---|
| 241 | } |
|---|
| 242 | |
|---|
| 243 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_collatename(const unsigned short* p1, const unsigned short* p2) |
|---|
| 244 | { |
|---|
| 245 | std::wstring result = c_regex_traits<wchar_t>::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2); |
|---|
| 246 | return string_type(result.begin(), result.end()); |
|---|
| 247 | } |
|---|
| 248 | |
|---|
| 249 | bool BOOST_REGEX_CALL c_regex_traits<unsigned short>::isctype(unsigned short c, char_class_type m) |
|---|
| 250 | { |
|---|
| 251 | return c_regex_traits<wchar_t>::isctype(c, m); |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | int BOOST_REGEX_CALL c_regex_traits<unsigned short>::value(unsigned short c, int radix) |
|---|
| 255 | { |
|---|
| 256 | return c_regex_traits<wchar_t>::value(c, radix); |
|---|
| 257 | } |
|---|
| 258 | |
|---|
| 259 | #endif |
|---|
| 260 | |
|---|
| 261 | } |
|---|
| 262 | |
|---|
| 263 | #endif // BOOST_NO_WREGEX |
|---|
| 264 | |
|---|
| 265 | #endif // __BORLANDC__ |
|---|
| 266 | |
|---|