1 | /////////////////////////////////////////////////////////////////////////////// |
---|
2 | /// \file cpp_regex_traits.hpp |
---|
3 | /// Contains the definition of the cpp_regex_traits\<\> template, which is a |
---|
4 | /// wrapper for std::locale that can be used to customize the behavior of |
---|
5 | /// static and dynamic regexes. |
---|
6 | // |
---|
7 | // Copyright 2004 Eric Niebler. Distributed under the Boost |
---|
8 | // Software License, Version 1.0. (See accompanying file |
---|
9 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
---|
10 | |
---|
11 | #ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005 |
---|
12 | #define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005 |
---|
13 | |
---|
14 | // MS compatible compilers support #pragma once |
---|
15 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
---|
16 | # pragma once |
---|
17 | #endif |
---|
18 | |
---|
19 | #include <string> |
---|
20 | #include <locale> |
---|
21 | #include <sstream> |
---|
22 | #include <boost/config.hpp> |
---|
23 | #include <boost/assert.hpp> |
---|
24 | #include <boost/integer.hpp> |
---|
25 | #include <boost/mpl/assert.hpp> |
---|
26 | #include <boost/type_traits/is_same.hpp> |
---|
27 | #include <boost/xpressive/detail/detail_fwd.hpp> |
---|
28 | #include <boost/xpressive/detail/utility/literals.hpp> |
---|
29 | |
---|
30 | // From John Maddock: |
---|
31 | // Fix for gcc prior to 3.4: std::ctype<wchar_t> doesn't allow masks to be combined, for example: |
---|
32 | // std::use_facet<std::ctype<wchar_t> >(locale()).is(std::ctype_base::lower|std::ctype_base::upper, L'a'); |
---|
33 | // incorrectly returns false. |
---|
34 | // NOTE: later version of the gcc define __GLIBCXX__, not __GLIBCPP__ |
---|
35 | #if BOOST_WORKAROUND(__GLIBCPP__, != 0) |
---|
36 | # define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET |
---|
37 | #endif |
---|
38 | |
---|
39 | namespace boost { namespace xpressive |
---|
40 | { |
---|
41 | |
---|
42 | namespace detail |
---|
43 | { |
---|
44 | // define an unsigned integral typedef of the same size as std::ctype_base::mask |
---|
45 | typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t; |
---|
46 | BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t)); |
---|
47 | |
---|
48 | // Calculate what the size of the umaskex_t type should be to fix the 3 extra bitmasks |
---|
49 | // 11 char categories in ctype_base |
---|
50 | // + 3 extra categories for xpressive |
---|
51 | // = 14 total bits needed |
---|
52 | int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT; |
---|
53 | |
---|
54 | // define an unsigned integral type with at least umaskex_bits |
---|
55 | typedef boost::uint_t<umaskex_bits>::fast umaskex_t; |
---|
56 | BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t)); |
---|
57 | |
---|
58 | // cast a ctype mask to a umaskex_t |
---|
59 | template<std::ctype_base::mask Mask> |
---|
60 | struct mask_cast |
---|
61 | { |
---|
62 | BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask)); |
---|
63 | }; |
---|
64 | |
---|
65 | #ifdef __CYGWIN__ |
---|
66 | // Work around a gcc warning on cygwin |
---|
67 | template<> |
---|
68 | struct mask_cast<std::ctype_base::print> |
---|
69 | { |
---|
70 | BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print); |
---|
71 | BOOST_STATIC_CONSTANT(umaskex_t, value = 0227); |
---|
72 | }; |
---|
73 | #endif |
---|
74 | |
---|
75 | #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION |
---|
76 | template<std::ctype_base::mask Mask> |
---|
77 | umaskex_t const mask_cast<Mask>::value; |
---|
78 | #endif |
---|
79 | |
---|
80 | #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET |
---|
81 | // an unsigned integer with the highest bit set |
---|
82 | umaskex_t const highest_bit = 1 << (sizeof(umaskex_t) * CHAR_BIT - 1); |
---|
83 | |
---|
84 | /////////////////////////////////////////////////////////////////////////////// |
---|
85 | // unused_mask |
---|
86 | // find a bit in an int that isn't set |
---|
87 | template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))> |
---|
88 | struct unused_mask |
---|
89 | { |
---|
90 | BOOST_MPL_ASSERT_RELATION(1, !=, Out); |
---|
91 | BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value)); |
---|
92 | }; |
---|
93 | |
---|
94 | template<umaskex_t In, umaskex_t Out> |
---|
95 | struct unused_mask<In, Out, true> |
---|
96 | { |
---|
97 | BOOST_STATIC_CONSTANT(umaskex_t, value = Out); |
---|
98 | }; |
---|
99 | |
---|
100 | #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION |
---|
101 | template<umaskex_t In, umaskex_t Out, bool Done> |
---|
102 | umaskex_t const unused_mask<In, Out, Done>::value; |
---|
103 | #endif |
---|
104 | |
---|
105 | umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value; |
---|
106 | umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value; |
---|
107 | umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value; |
---|
108 | umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value; |
---|
109 | umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value; |
---|
110 | umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value; |
---|
111 | umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value; |
---|
112 | umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value; |
---|
113 | umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value; |
---|
114 | umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value; |
---|
115 | umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value; |
---|
116 | |
---|
117 | // Reserve some bits for the implementation |
---|
118 | #if defined(__GLIBCXX__) && __GLIBCXX__ >= 20050209 |
---|
119 | umaskex_t const std_ctype_reserved = 0x8000; |
---|
120 | #else |
---|
121 | umaskex_t const std_ctype_reserved = 0; |
---|
122 | #endif |
---|
123 | |
---|
124 | // Bitwise-or all the ctype masks together |
---|
125 | umaskex_t const all_ctype_masks = std_ctype_reserved |
---|
126 | | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit |
---|
127 | | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct |
---|
128 | | std_ctype_space | std_ctype_upper | std_ctype_xdigit; |
---|
129 | |
---|
130 | // define a new mask for "underscore" ("word" == alnum | underscore) |
---|
131 | umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value; |
---|
132 | |
---|
133 | // define a new mask for "blank" |
---|
134 | umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value; |
---|
135 | |
---|
136 | // define a new mask for "newline" |
---|
137 | umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value; |
---|
138 | |
---|
139 | #else |
---|
140 | /////////////////////////////////////////////////////////////////////////////// |
---|
141 | // Ugly work-around for buggy ctype facets. |
---|
142 | umaskex_t const std_ctype_alnum = 1 << 0; |
---|
143 | umaskex_t const std_ctype_alpha = 1 << 1; |
---|
144 | umaskex_t const std_ctype_cntrl = 1 << 2; |
---|
145 | umaskex_t const std_ctype_digit = 1 << 3; |
---|
146 | umaskex_t const std_ctype_graph = 1 << 4; |
---|
147 | umaskex_t const std_ctype_lower = 1 << 5; |
---|
148 | umaskex_t const std_ctype_print = 1 << 6; |
---|
149 | umaskex_t const std_ctype_punct = 1 << 7; |
---|
150 | umaskex_t const std_ctype_space = 1 << 8; |
---|
151 | umaskex_t const std_ctype_upper = 1 << 9; |
---|
152 | umaskex_t const std_ctype_xdigit = 1 << 10; |
---|
153 | umaskex_t const non_std_ctype_underscore = 1 << 11; |
---|
154 | umaskex_t const non_std_ctype_blank = 1 << 12; |
---|
155 | umaskex_t const non_std_ctype_newline = 1 << 13; |
---|
156 | |
---|
157 | static umaskex_t const std_masks[] = |
---|
158 | { |
---|
159 | mask_cast<std::ctype_base::alnum>::value |
---|
160 | , mask_cast<std::ctype_base::alpha>::value |
---|
161 | , mask_cast<std::ctype_base::cntrl>::value |
---|
162 | , mask_cast<std::ctype_base::digit>::value |
---|
163 | , mask_cast<std::ctype_base::graph>::value |
---|
164 | , mask_cast<std::ctype_base::lower>::value |
---|
165 | , mask_cast<std::ctype_base::print>::value |
---|
166 | , mask_cast<std::ctype_base::punct>::value |
---|
167 | , mask_cast<std::ctype_base::space>::value |
---|
168 | , mask_cast<std::ctype_base::upper>::value |
---|
169 | , mask_cast<std::ctype_base::xdigit>::value |
---|
170 | }; |
---|
171 | |
---|
172 | inline int mylog2(umaskex_t i) |
---|
173 | { |
---|
174 | return "\0\0\1\0\2\0\0\0\3"[i & 0xf] |
---|
175 | + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04] |
---|
176 | + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010]; |
---|
177 | } |
---|
178 | #endif |
---|
179 | |
---|
180 | // convenient constant for the extra masks |
---|
181 | umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline; |
---|
182 | |
---|
183 | /////////////////////////////////////////////////////////////////////////////// |
---|
184 | // cpp_regex_traits_base |
---|
185 | // BUGBUG this should be replaced with a regex facet that lets you query for |
---|
186 | // an array of underscore characters and an array of line separator characters. |
---|
187 | template<typename Char, std::size_t SizeOfChar = sizeof(Char)> |
---|
188 | struct cpp_regex_traits_base |
---|
189 | { |
---|
190 | protected: |
---|
191 | void imbue(std::locale const &) |
---|
192 | { |
---|
193 | } |
---|
194 | |
---|
195 | static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask) |
---|
196 | { |
---|
197 | #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET |
---|
198 | |
---|
199 | if(ct.is((std::ctype_base::mask)(umask_t)mask, ch)) |
---|
200 | { |
---|
201 | return true; |
---|
202 | } |
---|
203 | |
---|
204 | #else |
---|
205 | |
---|
206 | umaskex_t tmp = mask & ~non_std_ctype_masks; |
---|
207 | for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i) |
---|
208 | { |
---|
209 | std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)]; |
---|
210 | if(ct.is(m, ch)) |
---|
211 | { |
---|
212 | return true; |
---|
213 | } |
---|
214 | } |
---|
215 | |
---|
216 | #endif |
---|
217 | |
---|
218 | return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch)) |
---|
219 | || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch)) |
---|
220 | || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch)); |
---|
221 | } |
---|
222 | |
---|
223 | private: |
---|
224 | static bool is_blank(Char ch) |
---|
225 | { |
---|
226 | BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t'); |
---|
227 | return L'\t' == ch; |
---|
228 | } |
---|
229 | |
---|
230 | static bool is_underscore(Char ch) |
---|
231 | { |
---|
232 | BOOST_MPL_ASSERT_RELATION('_', ==, L'_'); |
---|
233 | return L'_' == ch; |
---|
234 | } |
---|
235 | |
---|
236 | static bool is_newline(Char ch) |
---|
237 | { |
---|
238 | BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r'); |
---|
239 | BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n'); |
---|
240 | BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f'); |
---|
241 | return L'\r' == ch || L'\n' == ch || L'\f' == ch |
---|
242 | || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch)); |
---|
243 | } |
---|
244 | }; |
---|
245 | |
---|
246 | #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET |
---|
247 | |
---|
248 | template<typename Char> |
---|
249 | struct cpp_regex_traits_base<Char, 1> |
---|
250 | { |
---|
251 | protected: |
---|
252 | void imbue(std::locale const &loc) |
---|
253 | { |
---|
254 | int i = 0; |
---|
255 | Char allchars[UCHAR_MAX + 1]; |
---|
256 | for(i = 0; i <= UCHAR_MAX; ++i) |
---|
257 | { |
---|
258 | allchars[i] = static_cast<Char>(i); |
---|
259 | } |
---|
260 | |
---|
261 | std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc); |
---|
262 | std::ctype_base::mask tmp[UCHAR_MAX + 1]; |
---|
263 | ct.is(allchars, allchars + UCHAR_MAX + 1, tmp); |
---|
264 | for(i = 0; i <= UCHAR_MAX; ++i) |
---|
265 | { |
---|
266 | this->masks_[i] = static_cast<umask_t>(tmp[i]); |
---|
267 | BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks)); |
---|
268 | } |
---|
269 | |
---|
270 | this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore; |
---|
271 | this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank; |
---|
272 | this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank; |
---|
273 | this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline; |
---|
274 | this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline; |
---|
275 | this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline; |
---|
276 | } |
---|
277 | |
---|
278 | bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const |
---|
279 | { |
---|
280 | return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask); |
---|
281 | } |
---|
282 | |
---|
283 | private: |
---|
284 | umaskex_t masks_[UCHAR_MAX + 1]; |
---|
285 | }; |
---|
286 | |
---|
287 | #endif |
---|
288 | |
---|
289 | template<typename Char> |
---|
290 | struct version_tag |
---|
291 | { |
---|
292 | typedef regex_traits_version_1_tag type; |
---|
293 | }; |
---|
294 | |
---|
295 | template<> |
---|
296 | struct version_tag<char> |
---|
297 | { |
---|
298 | typedef regex_traits_version_1_case_fold_tag type; |
---|
299 | }; |
---|
300 | |
---|
301 | } // namespace detail |
---|
302 | |
---|
303 | /////////////////////////////////////////////////////////////////////////////// |
---|
304 | // cpp_regex_traits |
---|
305 | // |
---|
306 | /// \brief Encapsaulates a std::locale for use by the |
---|
307 | /// basic_regex\<\> class template. |
---|
308 | template<typename Char> |
---|
309 | struct cpp_regex_traits |
---|
310 | : detail::cpp_regex_traits_base<Char> |
---|
311 | { |
---|
312 | typedef Char char_type; |
---|
313 | typedef std::basic_string<char_type> string_type; |
---|
314 | typedef std::locale locale_type; |
---|
315 | typedef detail::umaskex_t char_class_type; |
---|
316 | typedef typename detail::version_tag<Char>::type version_tag; |
---|
317 | typedef detail::cpp_regex_traits_base<Char> base_type; |
---|
318 | |
---|
319 | /// Initialize a cpp_regex_traits object to use the specified std::locale, |
---|
320 | /// or the global std::locale if none is specified. |
---|
321 | /// |
---|
322 | cpp_regex_traits(locale_type const &loc = locale_type()) |
---|
323 | : base_type() |
---|
324 | , loc_() |
---|
325 | { |
---|
326 | this->imbue(loc); |
---|
327 | } |
---|
328 | |
---|
329 | /// Checks two cpp_regex_traits objects for equality |
---|
330 | /// |
---|
331 | /// \return this->getloc() == that.getloc(). |
---|
332 | bool operator ==(cpp_regex_traits<char_type> const &that) const |
---|
333 | { |
---|
334 | return this->loc_ == that.loc_; |
---|
335 | } |
---|
336 | |
---|
337 | /// Checks two cpp_regex_traits objects for inequality |
---|
338 | /// |
---|
339 | /// \return this->getloc() != that.getloc(). |
---|
340 | bool operator !=(cpp_regex_traits<char_type> const &that) const |
---|
341 | { |
---|
342 | return this->loc_ != that.loc_; |
---|
343 | } |
---|
344 | |
---|
345 | /// Convert a char to a Char |
---|
346 | /// |
---|
347 | /// \param ch The source character. |
---|
348 | /// \return std::use_facet<std::ctype<char_type> >(this->getloc()).widen(ch). |
---|
349 | char_type widen(char ch) const |
---|
350 | { |
---|
351 | return this->ctype_->widen(ch); |
---|
352 | } |
---|
353 | |
---|
354 | /// Returns a hash value for a Char in the range [0, UCHAR_MAX] |
---|
355 | /// |
---|
356 | /// \param ch The source character. |
---|
357 | /// \return a value between 0 and UCHAR_MAX, inclusive. |
---|
358 | static unsigned char hash(char_type ch) |
---|
359 | { |
---|
360 | return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch)); |
---|
361 | } |
---|
362 | |
---|
363 | /// No-op |
---|
364 | /// |
---|
365 | /// \param ch The source character. |
---|
366 | /// \return ch |
---|
367 | static char_type translate(char_type ch) |
---|
368 | { |
---|
369 | return ch; |
---|
370 | } |
---|
371 | |
---|
372 | /// Converts a character to lower-case using the internally-stored std::locale. |
---|
373 | /// |
---|
374 | /// \param ch The source character. |
---|
375 | /// \return std::tolower(ch, this->getloc()). |
---|
376 | char_type translate_nocase(char_type ch) const |
---|
377 | { |
---|
378 | return this->ctype_->tolower(ch); |
---|
379 | } |
---|
380 | |
---|
381 | /// Returns a string_type containing all the characters that compare equal |
---|
382 | /// disregrarding case to the one passed in. This function can only be called |
---|
383 | /// if is_convertible<version_tag*, regex_traits_version_1_case_fold_tag*>::value |
---|
384 | /// is true. |
---|
385 | /// |
---|
386 | /// \param ch The source character. |
---|
387 | /// \return string_type containing all chars which are equal to ch when disregarding |
---|
388 | /// case |
---|
389 | //typedef array<char_type, 2> fold_case_type; |
---|
390 | string_type fold_case(char_type ch) const |
---|
391 | { |
---|
392 | BOOST_MPL_ASSERT((is_same<char_type, char>)); |
---|
393 | char_type ntcs[] = { |
---|
394 | this->ctype_->tolower(ch) |
---|
395 | , this->ctype_->toupper(ch) |
---|
396 | , 0 |
---|
397 | }; |
---|
398 | if(ntcs[1] == ntcs[0]) |
---|
399 | ntcs[1] = 0; |
---|
400 | return string_type(ntcs); |
---|
401 | } |
---|
402 | |
---|
403 | /// Checks to see if a character is within a character range. |
---|
404 | /// |
---|
405 | /// \param first The bottom of the range, inclusive. |
---|
406 | /// \param last The top of the range, inclusive. |
---|
407 | /// \param ch The source character. |
---|
408 | /// \return first <= ch && ch <= last. |
---|
409 | static bool in_range(char_type first, char_type last, char_type ch) |
---|
410 | { |
---|
411 | return first <= ch && ch <= last; |
---|
412 | } |
---|
413 | |
---|
414 | /// Checks to see if a character is within a character range, irregardless of case. |
---|
415 | /// |
---|
416 | /// \param first The bottom of the range, inclusive. |
---|
417 | /// \param last The top of the range, inclusive. |
---|
418 | /// \param ch The source character. |
---|
419 | /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch, this->getloc())) || |
---|
420 | /// in_range(first, last, toupper(ch, this->getloc())) |
---|
421 | /// \attention The default implementation doesn't do proper Unicode |
---|
422 | /// case folding, but this is the best we can do with the standard |
---|
423 | /// ctype facet. |
---|
424 | bool in_range_nocase(char_type first, char_type last, char_type ch) const |
---|
425 | { |
---|
426 | // NOTE: this default implementation doesn't do proper Unicode |
---|
427 | // case folding, but this is the best we can do with the standard |
---|
428 | // std::ctype facet. |
---|
429 | return this->in_range(first, last, ch) |
---|
430 | || this->in_range(first, last, this->ctype_->toupper(ch)) |
---|
431 | || this->in_range(first, last, this->ctype_->tolower(ch)); |
---|
432 | } |
---|
433 | |
---|
434 | /// INTERNAL ONLY |
---|
435 | //string_type transform(char_type const *begin, char_type const *end) const |
---|
436 | //{ |
---|
437 | // return this->collate_->transform(begin, end); |
---|
438 | //} |
---|
439 | |
---|
440 | /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
---|
441 | /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
---|
442 | /// then v.transform(G1, G2) < v.transform(H1, H2). |
---|
443 | /// |
---|
444 | /// \attention Not used in xpressive 1.0 |
---|
445 | template<typename FwdIter> |
---|
446 | string_type transform(FwdIter begin, FwdIter end) const |
---|
447 | { |
---|
448 | //string_type str(begin, end); |
---|
449 | //return this->transform(str.data(), str.data() + str.size()); |
---|
450 | |
---|
451 | BOOST_ASSERT(false); |
---|
452 | return string_type(); |
---|
453 | } |
---|
454 | |
---|
455 | /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
---|
456 | /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
---|
457 | /// when character case is not considered then |
---|
458 | /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2). |
---|
459 | /// |
---|
460 | /// \attention Not used in xpressive 1.0 |
---|
461 | template<typename FwdIter> |
---|
462 | string_type transform_primary(FwdIter begin, FwdIter end) const |
---|
463 | { |
---|
464 | BOOST_ASSERT(false); // TODO implement me |
---|
465 | return string_type(); |
---|
466 | } |
---|
467 | |
---|
468 | /// Returns a sequence of characters that represents the collating element |
---|
469 | /// consisting of the character sequence designated by the iterator range [F1, F2). |
---|
470 | /// Returns an empty string if the character sequence is not a valid collating element. |
---|
471 | /// |
---|
472 | /// \attention Not used in xpressive 1.0 |
---|
473 | template<typename FwdIter> |
---|
474 | string_type lookup_collatename(FwdIter begin, FwdIter end) const |
---|
475 | { |
---|
476 | BOOST_ASSERT(false); // TODO implement me |
---|
477 | return string_type(); |
---|
478 | } |
---|
479 | |
---|
480 | /// For the character class name represented by the specified character sequence, |
---|
481 | /// return the corresponding bitmask representation. |
---|
482 | /// |
---|
483 | /// \param begin A forward iterator to the start of the character sequence representing |
---|
484 | /// the name of the character class. |
---|
485 | /// \param end The end of the character sequence. |
---|
486 | /// \param icase Specifies whether the returned bitmask should represent the case-insensitive |
---|
487 | /// version of the character class. |
---|
488 | /// \return A bitmask representing the character class. |
---|
489 | template<typename FwdIter> |
---|
490 | char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const |
---|
491 | { |
---|
492 | static detail::umaskex_t const icase_masks = |
---|
493 | detail::std_ctype_lower | detail::std_ctype_upper; |
---|
494 | |
---|
495 | BOOST_ASSERT(begin != end); |
---|
496 | char_class_type char_class = this->lookup_classname_impl_(begin, end); |
---|
497 | if(0 == char_class) |
---|
498 | { |
---|
499 | // convert the string to lowercase |
---|
500 | string_type classname(begin, end); |
---|
501 | for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i) |
---|
502 | { |
---|
503 | classname[i] = this->translate_nocase(classname[i]); |
---|
504 | } |
---|
505 | char_class = this->lookup_classname_impl_(classname.begin(), classname.end()); |
---|
506 | } |
---|
507 | // erase case-sensitivity if icase==true |
---|
508 | if(icase && 0 != (char_class & icase_masks)) |
---|
509 | { |
---|
510 | char_class |= icase_masks; |
---|
511 | } |
---|
512 | return char_class; |
---|
513 | } |
---|
514 | |
---|
515 | /// Tests a character against a character class bitmask. |
---|
516 | /// |
---|
517 | /// \param ch The character to test. |
---|
518 | /// \param mask The character class bitmask against which to test. |
---|
519 | /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed |
---|
520 | /// together. |
---|
521 | /// \return true if the character is a member of any of the specified character classes, false |
---|
522 | /// otherwise. |
---|
523 | bool isctype(char_type ch, char_class_type mask) const |
---|
524 | { |
---|
525 | return this->base_type::is(*this->ctype_, ch, mask); |
---|
526 | } |
---|
527 | |
---|
528 | /// Convert a digit character into the integer it represents. |
---|
529 | /// |
---|
530 | /// \param ch The digit character. |
---|
531 | /// \param radix The radix to use for the conversion. |
---|
532 | /// \pre radix is one of 8, 10, or 16. |
---|
533 | /// \return -1 if ch is not a digit character, the integer value of the character otherwise. |
---|
534 | /// The conversion is performed by imbueing a std::stringstream with this->getloc(); |
---|
535 | /// setting the radix to one of oct, hex or dec; inserting ch into the stream; and |
---|
536 | /// extracting an int. |
---|
537 | int value(char_type ch, int radix) const |
---|
538 | { |
---|
539 | BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); |
---|
540 | int val = -1; |
---|
541 | std::basic_stringstream<char_type> str; |
---|
542 | str.imbue(this->getloc()); |
---|
543 | str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec)); |
---|
544 | str.put(ch); |
---|
545 | str >> val; |
---|
546 | return str.fail() ? -1 : val; |
---|
547 | } |
---|
548 | |
---|
549 | /// Imbues *this with loc |
---|
550 | /// |
---|
551 | /// \param loc A std::locale. |
---|
552 | /// \return the previous std::locale used by *this. |
---|
553 | locale_type imbue(locale_type loc) |
---|
554 | { |
---|
555 | locale_type old_loc = this->loc_; |
---|
556 | this->loc_ = loc; |
---|
557 | this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_); |
---|
558 | //this->collate_ = &BOOST_USE_FACET(std::collate<char_type>, this->loc_); |
---|
559 | this->base_type::imbue(this->loc_); |
---|
560 | return old_loc; |
---|
561 | } |
---|
562 | |
---|
563 | /// Returns the current std::locale used by *this. |
---|
564 | /// |
---|
565 | locale_type getloc() const |
---|
566 | { |
---|
567 | return this->loc_; |
---|
568 | } |
---|
569 | |
---|
570 | private: |
---|
571 | |
---|
572 | /////////////////////////////////////////////////////////////////////////////// |
---|
573 | // char_class_pair |
---|
574 | /// INTERNAL ONLY |
---|
575 | struct char_class_pair |
---|
576 | { |
---|
577 | char_type const *class_name_; |
---|
578 | char_class_type class_type_; |
---|
579 | }; |
---|
580 | |
---|
581 | /////////////////////////////////////////////////////////////////////////////// |
---|
582 | // char_class |
---|
583 | /// INTERNAL ONLY |
---|
584 | static char_class_pair const &char_class(std::size_t j) |
---|
585 | { |
---|
586 | static char_class_pair const s_char_class_map[] = |
---|
587 | { |
---|
588 | { BOOST_XPR_CSTR_(char_type, "alnum"), detail::std_ctype_alnum } |
---|
589 | , { BOOST_XPR_CSTR_(char_type, "alpha"), detail::std_ctype_alpha } |
---|
590 | , { BOOST_XPR_CSTR_(char_type, "blank"), detail::non_std_ctype_blank } |
---|
591 | , { BOOST_XPR_CSTR_(char_type, "cntrl"), detail::std_ctype_cntrl } |
---|
592 | , { BOOST_XPR_CSTR_(char_type, "d"), detail::std_ctype_digit } |
---|
593 | , { BOOST_XPR_CSTR_(char_type, "digit"), detail::std_ctype_digit } |
---|
594 | , { BOOST_XPR_CSTR_(char_type, "graph"), detail::std_ctype_graph } |
---|
595 | , { BOOST_XPR_CSTR_(char_type, "lower"), detail::std_ctype_lower } |
---|
596 | , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline } |
---|
597 | , { BOOST_XPR_CSTR_(char_type, "print"), detail::std_ctype_print } |
---|
598 | , { BOOST_XPR_CSTR_(char_type, "punct"), detail::std_ctype_punct } |
---|
599 | , { BOOST_XPR_CSTR_(char_type, "s"), detail::std_ctype_space } |
---|
600 | , { BOOST_XPR_CSTR_(char_type, "space"), detail::std_ctype_space } |
---|
601 | , { BOOST_XPR_CSTR_(char_type, "upper"), detail::std_ctype_upper } |
---|
602 | , { BOOST_XPR_CSTR_(char_type, "w"), detail::std_ctype_alnum | detail::non_std_ctype_underscore } |
---|
603 | , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit } |
---|
604 | , { 0, 0 } |
---|
605 | }; |
---|
606 | return s_char_class_map[j]; |
---|
607 | } |
---|
608 | |
---|
609 | /////////////////////////////////////////////////////////////////////////////// |
---|
610 | // lookup_classname_impl |
---|
611 | /// INTERNAL ONLY |
---|
612 | template<typename FwdIter> |
---|
613 | static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end) |
---|
614 | { |
---|
615 | // find the classname |
---|
616 | typedef cpp_regex_traits<Char> this_t; |
---|
617 | for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j) |
---|
618 | { |
---|
619 | if(this_t::compare_(this_t::char_class(j).class_name_, begin, end)) |
---|
620 | { |
---|
621 | return this_t::char_class(j).class_type_; |
---|
622 | } |
---|
623 | } |
---|
624 | return 0; |
---|
625 | } |
---|
626 | |
---|
627 | /// INTERNAL ONLY |
---|
628 | template<typename FwdIter> |
---|
629 | static bool compare_(char_type const *name, FwdIter begin, FwdIter end) |
---|
630 | { |
---|
631 | for(; *name && begin != end; ++name, ++begin) |
---|
632 | { |
---|
633 | if(*name != *begin) |
---|
634 | { |
---|
635 | return false; |
---|
636 | } |
---|
637 | } |
---|
638 | return !*name && begin == end; |
---|
639 | } |
---|
640 | |
---|
641 | locale_type loc_; |
---|
642 | std::ctype<char_type> const *ctype_; |
---|
643 | //std::collate<char_type> const *collate_; |
---|
644 | }; |
---|
645 | |
---|
646 | /////////////////////////////////////////////////////////////////////////////// |
---|
647 | // cpp_regex_traits<>::hash specializations |
---|
648 | template<> |
---|
649 | inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch) |
---|
650 | { |
---|
651 | return ch; |
---|
652 | } |
---|
653 | |
---|
654 | template<> |
---|
655 | inline unsigned char cpp_regex_traits<char>::hash(char ch) |
---|
656 | { |
---|
657 | return static_cast<unsigned char>(ch); |
---|
658 | } |
---|
659 | |
---|
660 | template<> |
---|
661 | inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch) |
---|
662 | { |
---|
663 | return static_cast<unsigned char>(ch); |
---|
664 | } |
---|
665 | |
---|
666 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
---|
667 | template<> |
---|
668 | inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch) |
---|
669 | { |
---|
670 | return static_cast<unsigned char>(ch); |
---|
671 | } |
---|
672 | #endif |
---|
673 | |
---|
674 | }} |
---|
675 | |
---|
676 | #endif |
---|