[29] | 1 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 2 | /// \file regex_compiler.hpp |
---|
| 3 | /// Contains the definition of regex_compiler, a factory for building regex objects |
---|
| 4 | /// from strings. |
---|
| 5 | // |
---|
| 6 | // Copyright 2004 Eric Niebler. Distributed under the Boost |
---|
| 7 | // Software License, Version 1.0. (See accompanying file |
---|
| 8 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
---|
| 9 | |
---|
| 10 | #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 |
---|
| 11 | #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 |
---|
| 12 | |
---|
| 13 | // MS compatible compilers support #pragma once |
---|
| 14 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) |
---|
| 15 | # pragma once |
---|
| 16 | #endif |
---|
| 17 | |
---|
| 18 | #include <boost/xpressive/basic_regex.hpp> |
---|
| 19 | #include <boost/xpressive/detail/dynamic/parser.hpp> |
---|
| 20 | #include <boost/xpressive/detail/dynamic/parse_charset.hpp> |
---|
| 21 | #include <boost/xpressive/detail/dynamic/parser_enum.hpp> |
---|
| 22 | #include <boost/xpressive/detail/dynamic/parser_traits.hpp> |
---|
| 23 | #include <boost/xpressive/detail/core/linker.hpp> |
---|
| 24 | #include <boost/xpressive/detail/core/optimize.hpp> |
---|
| 25 | |
---|
| 26 | namespace boost { namespace xpressive |
---|
| 27 | { |
---|
| 28 | |
---|
| 29 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 30 | // regex_compiler |
---|
| 31 | // |
---|
| 32 | /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string. |
---|
| 33 | /// |
---|
| 34 | /// Class template regex_compiler is used to construct a basic_regex object from a string. The string |
---|
| 35 | /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale, |
---|
| 36 | /// after which all basic_regex objects created with that regex_compiler object will use that locale. |
---|
| 37 | /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the |
---|
| 38 | /// compile() method to construct a basic_regex object, passing it the string representing the regular |
---|
| 39 | /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex |
---|
| 40 | /// objects compiled from the same string will have different regex_id's. |
---|
| 41 | template<typename BidiIter, typename RegexTraits, typename CompilerTraits> |
---|
| 42 | struct regex_compiler |
---|
| 43 | { |
---|
| 44 | typedef BidiIter iterator_type; |
---|
| 45 | typedef typename iterator_value<BidiIter>::type char_type; |
---|
| 46 | typedef std::basic_string<char_type> string_type; |
---|
| 47 | typedef regex_constants::syntax_option_type flag_type; |
---|
| 48 | typedef RegexTraits traits_type; |
---|
| 49 | typedef typename traits_type::char_class_type char_class_type; |
---|
| 50 | typedef typename traits_type::locale_type locale_type; |
---|
| 51 | |
---|
| 52 | explicit regex_compiler(RegexTraits const &traits = RegexTraits()) |
---|
| 53 | : mark_count_(0) |
---|
| 54 | , hidden_mark_count_(0) |
---|
| 55 | , traits_(traits) |
---|
| 56 | , upper_(0) |
---|
| 57 | { |
---|
| 58 | this->upper_ = lookup_classname(this->rxtraits(), "upper"); |
---|
| 59 | BOOST_ASSERT(0 != this->upper_); |
---|
| 60 | } |
---|
| 61 | |
---|
| 62 | /////////////////////////////////////////////////////////////////////////// |
---|
| 63 | // imbue |
---|
| 64 | /// Specify the locale to be used by a regex_compiler. |
---|
| 65 | /// |
---|
| 66 | /// \param loc The locale that this regex_compiler should use. |
---|
| 67 | /// \return The previous locale. |
---|
| 68 | locale_type imbue(locale_type loc) |
---|
| 69 | { |
---|
| 70 | locale_type oldloc = this->traits_.imbue(loc); |
---|
| 71 | this->upper_ = lookup_classname(this->rxtraits(), "upper"); |
---|
| 72 | BOOST_ASSERT(0 != this->upper_); |
---|
| 73 | return oldloc; |
---|
| 74 | } |
---|
| 75 | |
---|
| 76 | /////////////////////////////////////////////////////////////////////////// |
---|
| 77 | // getloc |
---|
| 78 | /// Get the locale used by a regex_compiler. |
---|
| 79 | /// |
---|
| 80 | /// \param loc The locale that this regex_compiler uses. |
---|
| 81 | locale_type getloc() const |
---|
| 82 | { |
---|
| 83 | return this->traits_.getloc(); |
---|
| 84 | } |
---|
| 85 | |
---|
| 86 | /////////////////////////////////////////////////////////////////////////// |
---|
| 87 | // compile |
---|
| 88 | /// Builds a basic_regex object from a std::string. |
---|
| 89 | /// |
---|
| 90 | /// \param pat A std::string containing the regular expression pattern. |
---|
| 91 | /// \param flags Optional bitmask that determines how the pat string is interpreted. (See syntax_option_type.) |
---|
| 92 | /// \return A basic_regex object corresponding to the regular expression represented by the string. |
---|
| 93 | /// \pre The std::string pat contains a valid string-based representation of a regular expression. |
---|
| 94 | /// \throw regex_error when the string has invalid regular expression syntax. |
---|
| 95 | basic_regex<BidiIter> compile(string_type pat, flag_type flags = regex_constants::ECMAScript) |
---|
| 96 | { |
---|
| 97 | this->reset(); |
---|
| 98 | this->traits_.flags(flags); |
---|
| 99 | |
---|
| 100 | string_iterator begin = pat.begin(), end = pat.end(); |
---|
| 101 | |
---|
| 102 | // at the top level, a regex is a sequence of alternates |
---|
| 103 | alternates_list alternates; |
---|
| 104 | this->parse_alternates(begin, end, alternates); |
---|
| 105 | detail::ensure(begin == end, regex_constants::error_paren, "mismatched parenthesis"); |
---|
| 106 | |
---|
| 107 | // convert the alternates list to the appropriate matcher and terminate the sequence |
---|
| 108 | detail::sequence<BidiIter> seq = detail::alternates_to_matchable(alternates, alternates_factory()); |
---|
| 109 | seq += detail::make_dynamic_xpression<BidiIter>(detail::end_matcher()); |
---|
| 110 | |
---|
| 111 | // fill in the back-pointers by visiting the regex parse tree |
---|
| 112 | detail::xpression_linker<char_type> linker(this->rxtraits()); |
---|
| 113 | seq.first->link(linker); |
---|
| 114 | |
---|
| 115 | // bundle the regex information into a regex_impl object |
---|
| 116 | detail::regex_impl<BidiIter> impl; |
---|
| 117 | impl.xpr_ = seq.first; |
---|
| 118 | impl.traits_.reset(new RegexTraits(this->rxtraits())); |
---|
| 119 | impl.mark_count_ = this->mark_count_; |
---|
| 120 | impl.hidden_mark_count_ = this->hidden_mark_count_; |
---|
| 121 | |
---|
| 122 | // optimization: get the peek chars OR the boyer-moore search string |
---|
| 123 | detail::optimize_regex(impl, this->rxtraits(), detail::is_random<BidiIter>()); |
---|
| 124 | |
---|
| 125 | return detail::core_access<BidiIter>::make_regex(impl); |
---|
| 126 | } |
---|
| 127 | |
---|
| 128 | private: |
---|
| 129 | |
---|
| 130 | typedef typename string_type::const_iterator string_iterator; |
---|
| 131 | typedef std::list<detail::sequence<BidiIter> > alternates_list; |
---|
| 132 | typedef detail::escape_value<char_type, char_class_type> escape_value; |
---|
| 133 | typedef detail::alternates_factory_impl<BidiIter, traits_type> alternates_factory; |
---|
| 134 | |
---|
| 135 | /////////////////////////////////////////////////////////////////////////// |
---|
| 136 | // reset |
---|
| 137 | /// INTERNAL ONLY |
---|
| 138 | void reset() |
---|
| 139 | { |
---|
| 140 | this->mark_count_ = 0; |
---|
| 141 | this->hidden_mark_count_ = 0; |
---|
| 142 | this->traits_.flags(regex_constants::ECMAScript); |
---|
| 143 | } |
---|
| 144 | |
---|
| 145 | /////////////////////////////////////////////////////////////////////////// |
---|
| 146 | // regex_traits |
---|
| 147 | /// INTERNAL ONLY |
---|
| 148 | traits_type &rxtraits() |
---|
| 149 | { |
---|
| 150 | return this->traits_.traits(); |
---|
| 151 | } |
---|
| 152 | |
---|
| 153 | /////////////////////////////////////////////////////////////////////////// |
---|
| 154 | // regex_traits |
---|
| 155 | /// INTERNAL ONLY |
---|
| 156 | traits_type const &rxtraits() const |
---|
| 157 | { |
---|
| 158 | return this->traits_.traits(); |
---|
| 159 | } |
---|
| 160 | |
---|
| 161 | /////////////////////////////////////////////////////////////////////////// |
---|
| 162 | // parse_alternates |
---|
| 163 | /// INTERNAL ONLY |
---|
| 164 | void parse_alternates(string_iterator &begin, string_iterator end, alternates_list &alternates) |
---|
| 165 | { |
---|
| 166 | using namespace regex_constants; |
---|
| 167 | string_iterator old_begin; |
---|
| 168 | |
---|
| 169 | do |
---|
| 170 | { |
---|
| 171 | alternates.push_back(this->parse_sequence(begin, end)); |
---|
| 172 | old_begin = begin; |
---|
| 173 | } |
---|
| 174 | while(begin != end && token_alternate == this->traits_.get_token(begin, end)); |
---|
| 175 | |
---|
| 176 | begin = old_begin; |
---|
| 177 | } |
---|
| 178 | |
---|
| 179 | /////////////////////////////////////////////////////////////////////////// |
---|
| 180 | // parse_group |
---|
| 181 | /// INTERNAL ONLY |
---|
| 182 | detail::sequence<BidiIter> parse_group(string_iterator &begin, string_iterator end) |
---|
| 183 | { |
---|
| 184 | using namespace regex_constants; |
---|
| 185 | int mark_nbr = 0; |
---|
| 186 | bool keeper = false; |
---|
| 187 | bool lookahead = false; |
---|
| 188 | bool lookbehind = false; |
---|
| 189 | bool negative = false; |
---|
| 190 | std::size_t old_mark_count = this->mark_count_; |
---|
| 191 | |
---|
| 192 | detail::sequence<BidiIter> seq, seq_end; |
---|
| 193 | string_iterator tmp = string_iterator(); |
---|
| 194 | |
---|
| 195 | syntax_option_type old_flags = this->traits_.flags(); |
---|
| 196 | |
---|
| 197 | switch(this->traits_.get_group_type(begin, end)) |
---|
| 198 | { |
---|
| 199 | case token_no_mark: |
---|
| 200 | // Don't process empty groups like (?:) or (?i) |
---|
| 201 | // BUGBUG this doesn't handle the degenerate (?:)+ correctly |
---|
| 202 | if(token_group_end == this->traits_.get_token(tmp = begin, end)) |
---|
| 203 | { |
---|
| 204 | return this->parse_atom(begin = tmp, end); |
---|
| 205 | } |
---|
| 206 | break; |
---|
| 207 | |
---|
| 208 | case token_negative_lookahead: |
---|
| 209 | negative = true; // fall-through |
---|
| 210 | case token_positive_lookahead: |
---|
| 211 | lookahead = true; |
---|
| 212 | seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher()); |
---|
| 213 | break; |
---|
| 214 | |
---|
| 215 | case token_negative_lookbehind: |
---|
| 216 | negative = true; // fall-through |
---|
| 217 | case token_positive_lookbehind: |
---|
| 218 | lookbehind = true; |
---|
| 219 | seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher()); |
---|
| 220 | break; |
---|
| 221 | |
---|
| 222 | case token_independent_sub_expression: |
---|
| 223 | keeper = true; |
---|
| 224 | seq_end = detail::make_dynamic_xpression<BidiIter>(detail::true_matcher()); |
---|
| 225 | break; |
---|
| 226 | |
---|
| 227 | case token_comment: |
---|
| 228 | while(detail::ensure(begin != end, error_paren, "mismatched parenthesis")) |
---|
| 229 | { |
---|
| 230 | switch(this->traits_.get_token(begin, end)) |
---|
| 231 | { |
---|
| 232 | case token_group_end: return this->parse_atom(begin, end); |
---|
| 233 | case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence"); |
---|
| 234 | case token_literal: ++begin; |
---|
| 235 | default:; |
---|
| 236 | } |
---|
| 237 | } |
---|
| 238 | break; |
---|
| 239 | |
---|
| 240 | default: |
---|
| 241 | mark_nbr = static_cast<int>(++this->mark_count_); |
---|
| 242 | seq = detail::make_dynamic_xpression<BidiIter>(detail::mark_begin_matcher(mark_nbr)); |
---|
| 243 | seq_end = detail::make_dynamic_xpression<BidiIter>(detail::mark_end_matcher(mark_nbr)); |
---|
| 244 | break; |
---|
| 245 | } |
---|
| 246 | |
---|
| 247 | // alternates |
---|
| 248 | alternates_list alternates; |
---|
| 249 | this->parse_alternates(begin, end, alternates); |
---|
| 250 | detail::ensure |
---|
| 251 | ( |
---|
| 252 | begin != end && token_group_end == this->traits_.get_token(begin, end) |
---|
| 253 | , error_paren |
---|
| 254 | , "mismatched parenthesis" |
---|
| 255 | ); |
---|
| 256 | |
---|
| 257 | seq += detail::alternates_to_matchable(alternates, alternates_factory()); |
---|
| 258 | seq += seq_end; |
---|
| 259 | |
---|
| 260 | typedef shared_ptr<detail::matchable<BidiIter> const> xpr_type; |
---|
| 261 | bool do_save = (this->mark_count_ != old_mark_count); |
---|
| 262 | |
---|
| 263 | if(lookahead) |
---|
| 264 | { |
---|
| 265 | detail::lookahead_matcher<xpr_type> lookahead(seq.first, negative, do_save); |
---|
| 266 | seq = detail::make_dynamic_xpression<BidiIter>(lookahead); |
---|
| 267 | } |
---|
| 268 | else if(lookbehind) |
---|
| 269 | { |
---|
| 270 | detail::lookbehind_matcher<xpr_type> lookbehind(seq.first, negative, do_save); |
---|
| 271 | seq = detail::make_dynamic_xpression<BidiIter>(lookbehind); |
---|
| 272 | } |
---|
| 273 | else if(keeper) // independent sub-expression |
---|
| 274 | { |
---|
| 275 | detail::keeper_matcher<xpr_type> keeper(seq.first, do_save); |
---|
| 276 | seq = detail::make_dynamic_xpression<BidiIter>(keeper); |
---|
| 277 | } |
---|
| 278 | |
---|
| 279 | // restore the modifiers |
---|
| 280 | this->traits_.flags(old_flags); |
---|
| 281 | return seq; |
---|
| 282 | } |
---|
| 283 | |
---|
| 284 | /////////////////////////////////////////////////////////////////////////// |
---|
| 285 | // parse_charset |
---|
| 286 | /// INTERNAL ONLY |
---|
| 287 | detail::sequence<BidiIter> parse_charset(string_iterator &begin, string_iterator end) |
---|
| 288 | { |
---|
| 289 | detail::compound_charset<traits_type> chset; |
---|
| 290 | |
---|
| 291 | // call out to a helper to actually parse the character set |
---|
| 292 | detail::parse_charset(begin, end, chset, this->traits_); |
---|
| 293 | |
---|
| 294 | return detail::make_charset_xpression<BidiIter> |
---|
| 295 | ( |
---|
| 296 | chset |
---|
| 297 | , this->rxtraits() |
---|
| 298 | , this->traits_.flags() |
---|
| 299 | ); |
---|
| 300 | } |
---|
| 301 | |
---|
| 302 | /////////////////////////////////////////////////////////////////////////// |
---|
| 303 | // parse_atom |
---|
| 304 | /// INTERNAL ONLY |
---|
| 305 | detail::sequence<BidiIter> parse_atom(string_iterator &begin, string_iterator end) |
---|
| 306 | { |
---|
| 307 | using namespace regex_constants; |
---|
| 308 | escape_value esc = { 0, 0, 0, detail::escape_char }; |
---|
| 309 | string_iterator old_begin = begin; |
---|
| 310 | |
---|
| 311 | switch(this->traits_.get_token(begin, end)) |
---|
| 312 | { |
---|
| 313 | case token_literal: |
---|
| 314 | return detail::make_literal_xpression<BidiIter> |
---|
| 315 | ( |
---|
| 316 | this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits() |
---|
| 317 | ); |
---|
| 318 | |
---|
| 319 | case token_any: |
---|
| 320 | return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits()); |
---|
| 321 | |
---|
| 322 | case token_assert_begin_sequence: |
---|
| 323 | return detail::make_dynamic_xpression<BidiIter>(detail::assert_bos_matcher()); |
---|
| 324 | |
---|
| 325 | case token_assert_end_sequence: |
---|
| 326 | return detail::make_dynamic_xpression<BidiIter>(detail::assert_eos_matcher()); |
---|
| 327 | |
---|
| 328 | case token_assert_begin_line: |
---|
| 329 | return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits()); |
---|
| 330 | |
---|
| 331 | case token_assert_end_line: |
---|
| 332 | return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits()); |
---|
| 333 | |
---|
| 334 | case token_assert_word_boundary: |
---|
| 335 | return detail::make_assert_word<BidiIter>(detail::word_boundary<true>(), this->rxtraits()); |
---|
| 336 | |
---|
| 337 | case token_assert_not_word_boundary: |
---|
| 338 | return detail::make_assert_word<BidiIter>(detail::word_boundary<false>(), this->rxtraits()); |
---|
| 339 | |
---|
| 340 | case token_assert_word_begin: |
---|
| 341 | return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits()); |
---|
| 342 | |
---|
| 343 | case token_assert_word_end: |
---|
| 344 | return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits()); |
---|
| 345 | |
---|
| 346 | case token_escape: |
---|
| 347 | esc = this->parse_escape(begin, end); |
---|
| 348 | switch(esc.type_) |
---|
| 349 | { |
---|
| 350 | case detail::escape_mark: |
---|
| 351 | return detail::make_backref_xpression<BidiIter> |
---|
| 352 | ( |
---|
| 353 | esc.mark_nbr_, this->traits_.flags(), this->rxtraits() |
---|
| 354 | ); |
---|
| 355 | case detail::escape_char: |
---|
| 356 | return detail::make_char_xpression<BidiIter> |
---|
| 357 | ( |
---|
| 358 | esc.ch_, this->traits_.flags(), this->rxtraits() |
---|
| 359 | ); |
---|
| 360 | case detail::escape_class: |
---|
| 361 | return detail::make_posix_charset_xpression<BidiIter> |
---|
| 362 | ( |
---|
| 363 | esc.class_ |
---|
| 364 | , this->rxtraits().isctype(*begin++, this->upper_) |
---|
| 365 | , this->traits_.flags() |
---|
| 366 | , this->rxtraits() |
---|
| 367 | ); |
---|
| 368 | } |
---|
| 369 | |
---|
| 370 | case token_group_begin: |
---|
| 371 | return this->parse_group(begin, end); |
---|
| 372 | |
---|
| 373 | case token_charset_begin: |
---|
| 374 | return this->parse_charset(begin, end); |
---|
| 375 | |
---|
| 376 | case token_invalid_quantifier: |
---|
| 377 | throw regex_error(error_badrepeat, "quantifier not expected"); |
---|
| 378 | |
---|
| 379 | case token_quote_meta_begin: |
---|
| 380 | return detail::make_literal_xpression<BidiIter> |
---|
| 381 | ( |
---|
| 382 | this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits() |
---|
| 383 | ); |
---|
| 384 | |
---|
| 385 | case token_quote_meta_end: |
---|
| 386 | throw regex_error |
---|
| 387 | ( |
---|
| 388 | error_escape |
---|
| 389 | , "found quote-meta end without corresponding quote-meta begin" |
---|
| 390 | ); |
---|
| 391 | |
---|
| 392 | case token_end_of_pattern: |
---|
| 393 | break; |
---|
| 394 | |
---|
| 395 | default: |
---|
| 396 | begin = old_begin; |
---|
| 397 | break; |
---|
| 398 | } |
---|
| 399 | |
---|
| 400 | return detail::sequence<BidiIter>(); |
---|
| 401 | } |
---|
| 402 | |
---|
| 403 | /////////////////////////////////////////////////////////////////////////// |
---|
| 404 | // parse_quant |
---|
| 405 | /// INTERNAL ONLY |
---|
| 406 | detail::sequence<BidiIter> parse_quant(string_iterator &begin, string_iterator end) |
---|
| 407 | { |
---|
| 408 | BOOST_ASSERT(begin != end); |
---|
| 409 | detail::quant_spec spec = { 0, 0, false }; |
---|
| 410 | detail::sequence<BidiIter> seq = this->parse_atom(begin, end); |
---|
| 411 | |
---|
| 412 | // BUGBUG this doesn't handle the degenerate (?:)+ correctly |
---|
| 413 | if(!seq.is_empty() && begin != end && seq.first->is_quantifiable()) |
---|
| 414 | { |
---|
| 415 | if(this->traits_.get_quant_spec(begin, end, spec)) |
---|
| 416 | { |
---|
| 417 | BOOST_ASSERT(spec.min_ <= spec.max_); |
---|
| 418 | |
---|
| 419 | if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing. |
---|
| 420 | { |
---|
| 421 | seq = this->parse_quant(begin, end); |
---|
| 422 | } |
---|
| 423 | else |
---|
| 424 | { |
---|
| 425 | seq = seq.first->quantify(spec, this->hidden_mark_count_, seq, alternates_factory()); |
---|
| 426 | } |
---|
| 427 | } |
---|
| 428 | } |
---|
| 429 | |
---|
| 430 | return seq; |
---|
| 431 | } |
---|
| 432 | |
---|
| 433 | /////////////////////////////////////////////////////////////////////////// |
---|
| 434 | // parse_sequence |
---|
| 435 | /// INTERNAL ONLY |
---|
| 436 | detail::sequence<BidiIter> parse_sequence(string_iterator &begin, string_iterator end) |
---|
| 437 | { |
---|
| 438 | detail::sequence<BidiIter> seq; |
---|
| 439 | |
---|
| 440 | while(begin != end) |
---|
| 441 | { |
---|
| 442 | detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end); |
---|
| 443 | |
---|
| 444 | // did we find a quantified atom? |
---|
| 445 | if(seq_quant.is_empty()) |
---|
| 446 | break; |
---|
| 447 | |
---|
| 448 | // chain it to the end of the xpression sequence |
---|
| 449 | seq += seq_quant; |
---|
| 450 | } |
---|
| 451 | |
---|
| 452 | return seq; |
---|
| 453 | } |
---|
| 454 | |
---|
| 455 | /////////////////////////////////////////////////////////////////////////// |
---|
| 456 | // parse_literal |
---|
| 457 | // scan ahead looking for char literals to be globbed together into a string literal |
---|
| 458 | /// INTERNAL ONLY |
---|
| 459 | string_type parse_literal(string_iterator &begin, string_iterator end) |
---|
| 460 | { |
---|
| 461 | using namespace regex_constants; |
---|
| 462 | BOOST_ASSERT(begin != end); |
---|
| 463 | BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end)); |
---|
| 464 | escape_value esc = { 0, 0, 0, detail::escape_char }; |
---|
| 465 | string_type literal(1, *begin); |
---|
| 466 | |
---|
| 467 | for(string_iterator prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp) |
---|
| 468 | { |
---|
| 469 | detail::quant_spec spec; |
---|
| 470 | if(this->traits_.get_quant_spec(tmp, end, spec)) |
---|
| 471 | { |
---|
| 472 | if(literal.size() != 1) |
---|
| 473 | { |
---|
| 474 | begin = prev; |
---|
| 475 | literal.erase(literal.size() - 1); |
---|
| 476 | } |
---|
| 477 | return literal; |
---|
| 478 | } |
---|
| 479 | else switch(this->traits_.get_token(tmp, end)) |
---|
| 480 | { |
---|
| 481 | case token_escape: |
---|
| 482 | esc = this->parse_escape(tmp, end); |
---|
| 483 | if(detail::escape_char != esc.type_) return literal; |
---|
| 484 | literal += esc.ch_; |
---|
| 485 | break; |
---|
| 486 | case token_literal: |
---|
| 487 | literal += *tmp++; |
---|
| 488 | break; |
---|
| 489 | default: |
---|
| 490 | return literal; |
---|
| 491 | } |
---|
| 492 | } |
---|
| 493 | |
---|
| 494 | return literal; |
---|
| 495 | } |
---|
| 496 | |
---|
| 497 | /////////////////////////////////////////////////////////////////////////// |
---|
| 498 | // parse_quote_meta |
---|
| 499 | // scan ahead looking for char literals to be globbed together into a string literal |
---|
| 500 | /// INTERNAL ONLY |
---|
| 501 | string_type parse_quote_meta(string_iterator &begin, string_iterator end) |
---|
| 502 | { |
---|
| 503 | using namespace regex_constants; |
---|
| 504 | string_iterator old_begin = begin, old_end; |
---|
| 505 | while(end != (old_end = begin)) |
---|
| 506 | { |
---|
| 507 | switch(this->traits_.get_token(begin, end)) |
---|
| 508 | { |
---|
| 509 | case token_quote_meta_end: return string_type(old_begin, old_end); |
---|
| 510 | case token_escape: detail::ensure(begin != end, error_escape, "incomplete escape sequence"); |
---|
| 511 | case token_literal: ++begin; |
---|
| 512 | default:; |
---|
| 513 | } |
---|
| 514 | } |
---|
| 515 | return string_type(old_begin, begin); |
---|
| 516 | } |
---|
| 517 | |
---|
| 518 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 519 | // parse_escape |
---|
| 520 | /// INTERNAL ONLY |
---|
| 521 | escape_value parse_escape(string_iterator &begin, string_iterator end) |
---|
| 522 | { |
---|
| 523 | detail::ensure(begin != end, regex_constants::error_escape, "incomplete escape sequence"); |
---|
| 524 | |
---|
| 525 | // first, check to see if this can be a backreference |
---|
| 526 | if(0 < this->rxtraits().value(*begin, 10)) |
---|
| 527 | { |
---|
| 528 | // Parse at most 3 decimal digits. |
---|
| 529 | string_iterator tmp = begin; |
---|
| 530 | int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999); |
---|
| 531 | |
---|
| 532 | // If the resulting number could conceivably be a backref, then it is. |
---|
| 533 | if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_)) |
---|
| 534 | { |
---|
| 535 | begin = tmp; |
---|
| 536 | escape_value esc = {0, mark_nbr, 0, detail::escape_mark}; |
---|
| 537 | return esc; |
---|
| 538 | } |
---|
| 539 | } |
---|
| 540 | |
---|
| 541 | // Not a backreference, defer to the parse_escape helper |
---|
| 542 | return detail::parse_escape(begin, end, this->traits_); |
---|
| 543 | } |
---|
| 544 | |
---|
| 545 | std::size_t mark_count_; |
---|
| 546 | std::size_t hidden_mark_count_; |
---|
| 547 | CompilerTraits traits_; |
---|
| 548 | typename RegexTraits::char_class_type upper_; |
---|
| 549 | }; |
---|
| 550 | |
---|
| 551 | }} // namespace boost::xpressive |
---|
| 552 | |
---|
| 553 | #endif |
---|