| 1 | ////////////////////////////////////////////////////////////////////////////// |
|---|
| 2 | /// \file c_regex_traits.hpp |
|---|
| 3 | /// Contains the definition of the c_regex_traits\<\> template, which is a |
|---|
| 4 | /// wrapper for the C locale functions that can be used to customize the |
|---|
| 5 | /// behavior of static and dynamic regexes. |
|---|
| 6 | // |
|---|
| 7 | // Copyright 2004 Eric Niebler. Distributed under the Boost |
|---|
| 8 | // Software License, Version 1.0. (See accompanying file |
|---|
| 9 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 10 | |
|---|
| 11 | #ifndef BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 |
|---|
| 12 | #define BOOST_XPRESSIVE_TRAITS_C_REGEX_TRAITS_HPP_EAN_10_04_2005 |
|---|
| 13 | |
|---|
| 14 | // MS compatible compilers support #pragma once |
|---|
| 15 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
|---|
| 16 | # pragma once |
|---|
| 17 | #endif |
|---|
| 18 | |
|---|
| 19 | #include <boost/config.hpp> |
|---|
| 20 | #include <boost/assert.hpp> |
|---|
| 21 | #include <boost/xpressive/traits/detail/c_ctype.hpp> |
|---|
| 22 | |
|---|
| 23 | namespace boost { namespace xpressive |
|---|
| 24 | { |
|---|
| 25 | |
|---|
| 26 | namespace detail |
|---|
| 27 | { |
|---|
| 28 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 29 | // empty_locale |
|---|
| 30 | struct empty_locale |
|---|
| 31 | { |
|---|
| 32 | }; |
|---|
| 33 | |
|---|
| 34 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 35 | // c_regex_traits_base |
|---|
| 36 | template<typename Char, std::size_t SizeOfChar = sizeof(Char)> |
|---|
| 37 | struct c_regex_traits_base |
|---|
| 38 | { |
|---|
| 39 | protected: |
|---|
| 40 | template<typename Traits> |
|---|
| 41 | void imbue(Traits const &tr) |
|---|
| 42 | { |
|---|
| 43 | } |
|---|
| 44 | }; |
|---|
| 45 | |
|---|
| 46 | template<typename Char> |
|---|
| 47 | struct c_regex_traits_base<Char, 1> |
|---|
| 48 | { |
|---|
| 49 | protected: |
|---|
| 50 | template<typename Traits> |
|---|
| 51 | static void imbue(Traits const &) |
|---|
| 52 | { |
|---|
| 53 | } |
|---|
| 54 | }; |
|---|
| 55 | |
|---|
| 56 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
|---|
| 57 | template<std::size_t SizeOfChar> |
|---|
| 58 | struct c_regex_traits_base<wchar_t, SizeOfChar> |
|---|
| 59 | { |
|---|
| 60 | protected: |
|---|
| 61 | template<typename Traits> |
|---|
| 62 | static void imbue(Traits const &) |
|---|
| 63 | { |
|---|
| 64 | } |
|---|
| 65 | }; |
|---|
| 66 | #endif |
|---|
| 67 | |
|---|
| 68 | template<typename Char> |
|---|
| 69 | Char c_tolower(Char); |
|---|
| 70 | |
|---|
| 71 | template<typename Char> |
|---|
| 72 | Char c_toupper(Char); |
|---|
| 73 | |
|---|
| 74 | template<> |
|---|
| 75 | inline char c_tolower(char ch) |
|---|
| 76 | { |
|---|
| 77 | using namespace std; |
|---|
| 78 | return tolower(static_cast<unsigned char>(ch)); |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | template<> |
|---|
| 82 | inline char c_toupper(char ch) |
|---|
| 83 | { |
|---|
| 84 | using namespace std; |
|---|
| 85 | return toupper(static_cast<unsigned char>(ch)); |
|---|
| 86 | } |
|---|
| 87 | |
|---|
| 88 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
|---|
| 89 | template<> |
|---|
| 90 | inline wchar_t c_tolower(wchar_t ch) |
|---|
| 91 | { |
|---|
| 92 | using namespace std; |
|---|
| 93 | return towlower(ch); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | template<> |
|---|
| 97 | inline wchar_t c_toupper(wchar_t ch) |
|---|
| 98 | { |
|---|
| 99 | using namespace std; |
|---|
| 100 | return towupper(ch); |
|---|
| 101 | } |
|---|
| 102 | #endif |
|---|
| 103 | |
|---|
| 104 | } // namespace detail |
|---|
| 105 | |
|---|
| 106 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 107 | // regex_traits_version_1_tag |
|---|
| 108 | // |
|---|
| 109 | struct regex_traits_version_1_tag; |
|---|
| 110 | |
|---|
| 111 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 112 | // c_regex_traits |
|---|
| 113 | // |
|---|
| 114 | /// \brief Encapsaulates the standard C locale functions for use by the |
|---|
| 115 | /// basic_regex\<\> class template. |
|---|
| 116 | template<typename Char> |
|---|
| 117 | struct c_regex_traits |
|---|
| 118 | : detail::c_regex_traits_base<Char> |
|---|
| 119 | { |
|---|
| 120 | typedef Char char_type; |
|---|
| 121 | typedef std::basic_string<char_type> string_type; |
|---|
| 122 | typedef detail::empty_locale locale_type; |
|---|
| 123 | typedef typename detail::char_class_impl<Char>::char_class_type char_class_type; |
|---|
| 124 | typedef regex_traits_version_1_tag version_tag; |
|---|
| 125 | typedef detail::c_regex_traits_base<Char> base_type; |
|---|
| 126 | |
|---|
| 127 | /// Initialize a c_regex_traits object to use the global C locale. |
|---|
| 128 | /// |
|---|
| 129 | c_regex_traits(locale_type const &loc = locale_type()) |
|---|
| 130 | : base_type() |
|---|
| 131 | { |
|---|
| 132 | this->imbue(loc); |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | /// Checks two c_regex_traits objects for equality |
|---|
| 136 | /// |
|---|
| 137 | /// \return true. |
|---|
| 138 | bool operator ==(c_regex_traits<char_type> const &that) const |
|---|
| 139 | { |
|---|
| 140 | return true; |
|---|
| 141 | } |
|---|
| 142 | |
|---|
| 143 | /// Checks two c_regex_traits objects for inequality |
|---|
| 144 | /// |
|---|
| 145 | /// \return false. |
|---|
| 146 | bool operator !=(c_regex_traits<char_type> const &that) const |
|---|
| 147 | { |
|---|
| 148 | return false; |
|---|
| 149 | } |
|---|
| 150 | |
|---|
| 151 | /// Convert a char to a Char |
|---|
| 152 | /// |
|---|
| 153 | /// \param ch The source character. |
|---|
| 154 | /// \return ch if Char is char, std::btowc(ch) if Char is wchar_t. |
|---|
| 155 | static char_type widen(char ch); |
|---|
| 156 | |
|---|
| 157 | /// Returns a hash value for a Char in the range [0, UCHAR_MAX] |
|---|
| 158 | /// |
|---|
| 159 | /// \param ch The source character. |
|---|
| 160 | /// \return a value between 0 and UCHAR_MAX, inclusive. |
|---|
| 161 | static unsigned char hash(char_type ch) |
|---|
| 162 | { |
|---|
| 163 | return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch)); |
|---|
| 164 | } |
|---|
| 165 | |
|---|
| 166 | /// No-op |
|---|
| 167 | /// |
|---|
| 168 | /// \param ch The source character. |
|---|
| 169 | /// \return ch |
|---|
| 170 | static char_type translate(char_type ch) |
|---|
| 171 | { |
|---|
| 172 | return ch; |
|---|
| 173 | } |
|---|
| 174 | |
|---|
| 175 | /// Converts a character to lower-case using the current global C locale. |
|---|
| 176 | /// |
|---|
| 177 | /// \param ch The source character. |
|---|
| 178 | /// \return std::tolower(ch) if Char is char, std::towlower(ch) if Char is wchar_t. |
|---|
| 179 | static char_type translate_nocase(char_type ch) |
|---|
| 180 | { |
|---|
| 181 | return detail::c_tolower(ch); |
|---|
| 182 | } |
|---|
| 183 | |
|---|
| 184 | /// Checks to see if a character is within a character range. |
|---|
| 185 | /// |
|---|
| 186 | /// \param first The bottom of the range, inclusive. |
|---|
| 187 | /// \param last The top of the range, inclusive. |
|---|
| 188 | /// \param ch The source character. |
|---|
| 189 | /// \return first <= ch && ch <= last. |
|---|
| 190 | static bool in_range(char_type first, char_type last, char_type ch) |
|---|
| 191 | { |
|---|
| 192 | return first <= ch && ch <= last; |
|---|
| 193 | } |
|---|
| 194 | |
|---|
| 195 | /// Checks to see if a character is within a character range, irregardless of case. |
|---|
| 196 | /// |
|---|
| 197 | /// \param first The bottom of the range, inclusive. |
|---|
| 198 | /// \param last The top of the range, inclusive. |
|---|
| 199 | /// \param ch The source character. |
|---|
| 200 | /// \return in_range(first, last, ch) || in_range(first, last, tolower(ch)) || in_range(first, |
|---|
| 201 | /// last, toupper(ch)) |
|---|
| 202 | /// \attention The default implementation doesn't do proper Unicode |
|---|
| 203 | /// case folding, but this is the best we can do with the standard |
|---|
| 204 | /// C locale functions. |
|---|
| 205 | static bool in_range_nocase(char_type first, char_type last, char_type ch) |
|---|
| 206 | { |
|---|
| 207 | return c_regex_traits::in_range(first, last, ch) |
|---|
| 208 | || c_regex_traits::in_range(first, last, detail::c_tolower(ch)) |
|---|
| 209 | || c_regex_traits::in_range(first, last, detail::c_toupper(ch)); |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
|---|
| 213 | /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
|---|
| 214 | /// then v.transform(G1, G2) < v.transform(H1, H2). |
|---|
| 215 | /// |
|---|
| 216 | /// \attention Not used in xpressive 1.0 |
|---|
| 217 | template<typename FwdIter> |
|---|
| 218 | static string_type transform(FwdIter begin, FwdIter end) |
|---|
| 219 | { |
|---|
| 220 | BOOST_ASSERT(false); // BUGBUG implement me |
|---|
| 221 | } |
|---|
| 222 | |
|---|
| 223 | /// Returns a sort key for the character sequence designated by the iterator range [F1, F2) |
|---|
| 224 | /// such that if the character sequence [G1, G2) sorts before the character sequence [H1, H2) |
|---|
| 225 | /// when character case is not considered then |
|---|
| 226 | /// v.transform_primary(G1, G2) < v.transform_primary(H1, H2). |
|---|
| 227 | /// |
|---|
| 228 | /// \attention Not used in xpressive 1.0 |
|---|
| 229 | template<typename FwdIter> |
|---|
| 230 | static string_type transform_primary(FwdIter begin, FwdIter end) |
|---|
| 231 | { |
|---|
| 232 | BOOST_ASSERT(false); // BUGBUG implement me |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | /// Returns a sequence of characters that represents the collating element |
|---|
| 236 | /// consisting of the character sequence designated by the iterator range [F1, F2). |
|---|
| 237 | /// Returns an empty string if the character sequence is not a valid collating element. |
|---|
| 238 | /// |
|---|
| 239 | /// \attention Not used in xpressive 1.0 |
|---|
| 240 | template<typename FwdIter> |
|---|
| 241 | static string_type lookup_collatename(FwdIter begin, FwdIter end) |
|---|
| 242 | { |
|---|
| 243 | BOOST_ASSERT(false); // BUGBUG implement me |
|---|
| 244 | } |
|---|
| 245 | |
|---|
| 246 | /// For the character class name represented by the specified character sequence, |
|---|
| 247 | /// return the corresponding bitmask representation. |
|---|
| 248 | /// |
|---|
| 249 | /// \param begin A forward iterator to the start of the character sequence representing |
|---|
| 250 | /// the name of the character class. |
|---|
| 251 | /// \param end The end of the character sequence. |
|---|
| 252 | /// \param icase Specifies whether the returned bitmask should represent the case-insensitive |
|---|
| 253 | /// version of the character class. |
|---|
| 254 | /// \return A bitmask representing the character class. |
|---|
| 255 | template<typename FwdIter> |
|---|
| 256 | static char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) |
|---|
| 257 | { |
|---|
| 258 | return detail::char_class_impl<char_type>::lookup_classname(begin, end, icase); |
|---|
| 259 | } |
|---|
| 260 | |
|---|
| 261 | /// Tests a character against a character class bitmask. |
|---|
| 262 | /// |
|---|
| 263 | /// \param ch The character to test. |
|---|
| 264 | /// \param mask The character class bitmask against which to test. |
|---|
| 265 | /// \pre mask is a bitmask returned by lookup_classname, or is several such masks bit-or'ed |
|---|
| 266 | /// together. |
|---|
| 267 | /// \return true if the character is a member of any of the specified character classes, false |
|---|
| 268 | /// otherwise. |
|---|
| 269 | static bool isctype(char_type ch, char_class_type mask) |
|---|
| 270 | { |
|---|
| 271 | return detail::char_class_impl<char_type>::isctype(ch, mask); |
|---|
| 272 | } |
|---|
| 273 | |
|---|
| 274 | /// Convert a digit character into the integer it represents. |
|---|
| 275 | /// |
|---|
| 276 | /// \param ch The digit character. |
|---|
| 277 | /// \param radix The radix to use for the conversion. |
|---|
| 278 | /// \pre radix is one of 8, 10, or 16. |
|---|
| 279 | /// \return -1 if ch is not a digit character, the integer value of the character otherwise. If |
|---|
| 280 | /// char_type is char, std::strtol is used for the conversion. If char_type is wchar_t, |
|---|
| 281 | /// std::wcstol is used. |
|---|
| 282 | static int value(char_type ch, int radix); |
|---|
| 283 | |
|---|
| 284 | /// No-op |
|---|
| 285 | /// |
|---|
| 286 | locale_type imbue(locale_type loc) |
|---|
| 287 | { |
|---|
| 288 | this->base_type::imbue(*this); |
|---|
| 289 | return loc; |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | /// No-op |
|---|
| 293 | /// |
|---|
| 294 | static locale_type getloc() |
|---|
| 295 | { |
|---|
| 296 | locale_type loc; |
|---|
| 297 | return loc; |
|---|
| 298 | } |
|---|
| 299 | }; |
|---|
| 300 | |
|---|
| 301 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 302 | // c_regex_traits<>::widen specializations |
|---|
| 303 | /// INTERNAL ONLY |
|---|
| 304 | template<> |
|---|
| 305 | inline char c_regex_traits<char>::widen(char ch) |
|---|
| 306 | { |
|---|
| 307 | return ch; |
|---|
| 308 | } |
|---|
| 309 | |
|---|
| 310 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
|---|
| 311 | /// INTERNAL ONLY |
|---|
| 312 | template<> |
|---|
| 313 | inline wchar_t c_regex_traits<wchar_t>::widen(char ch) |
|---|
| 314 | { |
|---|
| 315 | using namespace std; |
|---|
| 316 | return btowc(ch); |
|---|
| 317 | } |
|---|
| 318 | #endif |
|---|
| 319 | |
|---|
| 320 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 321 | // c_regex_traits<>::hash specializations |
|---|
| 322 | /// INTERNAL ONLY |
|---|
| 323 | template<> |
|---|
| 324 | inline unsigned char c_regex_traits<char>::hash(char ch) |
|---|
| 325 | { |
|---|
| 326 | return static_cast<unsigned char>(ch); |
|---|
| 327 | } |
|---|
| 328 | |
|---|
| 329 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
|---|
| 330 | /// INTERNAL ONLY |
|---|
| 331 | template<> |
|---|
| 332 | inline unsigned char c_regex_traits<wchar_t>::hash(wchar_t ch) |
|---|
| 333 | { |
|---|
| 334 | return static_cast<unsigned char>(ch); |
|---|
| 335 | } |
|---|
| 336 | #endif |
|---|
| 337 | |
|---|
| 338 | /////////////////////////////////////////////////////////////////////////////// |
|---|
| 339 | // c_regex_traits<>::value specializations |
|---|
| 340 | /// INTERNAL ONLY |
|---|
| 341 | template<> |
|---|
| 342 | inline int c_regex_traits<char>::value(char ch, int radix) |
|---|
| 343 | { |
|---|
| 344 | using namespace std; |
|---|
| 345 | BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); |
|---|
| 346 | char begin[2] = { ch, '\0' }, *end = 0; |
|---|
| 347 | int val = strtol(begin, &end, radix); |
|---|
| 348 | return begin == end ? -1 : val; |
|---|
| 349 | } |
|---|
| 350 | |
|---|
| 351 | #ifndef BOOST_XPRESSIVE_NO_WREGEX |
|---|
| 352 | /// INTERNAL ONLY |
|---|
| 353 | template<> |
|---|
| 354 | inline int c_regex_traits<wchar_t>::value(wchar_t ch, int radix) |
|---|
| 355 | { |
|---|
| 356 | using namespace std; |
|---|
| 357 | BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix); |
|---|
| 358 | wchar_t begin[2] = { ch, L'\0' }, *end = 0; |
|---|
| 359 | int val = wcstol(begin, &end, radix); |
|---|
| 360 | return begin == end ? -1 : val; |
|---|
| 361 | } |
|---|
| 362 | #endif |
|---|
| 363 | |
|---|
| 364 | }} |
|---|
| 365 | |
|---|
| 366 | #endif |
|---|