| 1 | /* |
|---|
| 2 | * |
|---|
| 3 | * Copyright (c) 2004 |
|---|
| 4 | * John Maddock |
|---|
| 5 | * |
|---|
| 6 | * Use, modification and distribution are subject to the |
|---|
| 7 | * Boost Software License, Version 1.0. (See accompanying file |
|---|
| 8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 9 | * |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | /* |
|---|
| 13 | * LOCATION: see http://www.boost.org for most recent version. |
|---|
| 14 | * FILE test_icu.cpp |
|---|
| 15 | * VERSION see <boost/version.hpp> |
|---|
| 16 | * DESCRIPTION: Test code for Unicode regexes with ICU support. |
|---|
| 17 | */ |
|---|
| 18 | |
|---|
| 19 | // |
|---|
| 20 | // We can only build this if we have ICU support: |
|---|
| 21 | // |
|---|
| 22 | #include <boost/regex/config.hpp> |
|---|
| 23 | #if defined(BOOST_HAS_ICU) && !defined(BOOST_NO_STD_WSTRING) |
|---|
| 24 | |
|---|
| 25 | #include <boost/regex/icu.hpp> |
|---|
| 26 | #include "test.hpp" |
|---|
| 27 | |
|---|
| 28 | namespace unnecessary_fix{ |
|---|
| 29 | // |
|---|
| 30 | // Some outrageously broken std lib's don't have a conforming |
|---|
| 31 | // back_insert_iterator, which means we can't use the std version |
|---|
| 32 | // as an argument to regex_replace, sigh... use our own: |
|---|
| 33 | // |
|---|
| 34 | template <class Seq> |
|---|
| 35 | class back_insert_iterator |
|---|
| 36 | #ifndef BOOST_NO_STD_ITERATOR |
|---|
| 37 | : public std::iterator<std::output_iterator_tag,void,void,void,void> |
|---|
| 38 | #endif |
|---|
| 39 | { |
|---|
| 40 | private: |
|---|
| 41 | Seq* container; |
|---|
| 42 | public: |
|---|
| 43 | typedef const typename Seq::value_type value_type; |
|---|
| 44 | typedef Seq container_type; |
|---|
| 45 | typedef std::output_iterator_tag iterator_category; |
|---|
| 46 | |
|---|
| 47 | explicit back_insert_iterator(Seq& x) : container(&x) {} |
|---|
| 48 | back_insert_iterator& operator=(const value_type& val) |
|---|
| 49 | { |
|---|
| 50 | container->push_back(val); |
|---|
| 51 | return *this; |
|---|
| 52 | } |
|---|
| 53 | back_insert_iterator& operator*() { return *this; } |
|---|
| 54 | back_insert_iterator& operator++() { return *this; } |
|---|
| 55 | back_insert_iterator operator++(int) { return *this; } |
|---|
| 56 | }; |
|---|
| 57 | |
|---|
| 58 | template <class Seq> |
|---|
| 59 | inline back_insert_iterator<Seq> back_inserter(Seq& x) |
|---|
| 60 | { |
|---|
| 61 | return back_insert_iterator<Seq>(x); |
|---|
| 62 | } |
|---|
| 63 | |
|---|
| 64 | } |
|---|
| 65 | |
|---|
| 66 | // |
|---|
| 67 | // compare two match_results struct's for equality, |
|---|
| 68 | // converting the iterator as needed: |
|---|
| 69 | // |
|---|
| 70 | template <class MR1, class MR2> |
|---|
| 71 | void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<2> const*) |
|---|
| 72 | { |
|---|
| 73 | typedef typename MR2::value_type MR2_value_type; |
|---|
| 74 | typedef typename MR2_value_type::const_iterator MR2_iterator_type; |
|---|
| 75 | typedef boost::u16_to_u32_iterator<MR2_iterator_type> iterator_type; |
|---|
| 76 | typedef typename MR1::size_type size_type; |
|---|
| 77 | if(w1.size() != w2.size()) |
|---|
| 78 | { |
|---|
| 79 | BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32); |
|---|
| 80 | } |
|---|
| 81 | for(int i = 0; i < (int)w1.size(); ++i) |
|---|
| 82 | { |
|---|
| 83 | if(w1[i].matched) |
|---|
| 84 | { |
|---|
| 85 | if(w2[i].matched == 0) |
|---|
| 86 | { |
|---|
| 87 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); |
|---|
| 88 | } |
|---|
| 89 | if((w1.position(i) != boost::re_detail::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::re_detail::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) |
|---|
| 90 | { |
|---|
| 91 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); |
|---|
| 92 | } |
|---|
| 93 | } |
|---|
| 94 | else if(w2[i].matched) |
|---|
| 95 | { |
|---|
| 96 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | template <class MR1, class MR2> |
|---|
| 101 | void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*) |
|---|
| 102 | { |
|---|
| 103 | typedef typename MR2::value_type MR2_value_type; |
|---|
| 104 | typedef typename MR2_value_type::const_iterator MR2_iterator_type; |
|---|
| 105 | typedef boost::u8_to_u32_iterator<MR2_iterator_type> iterator_type; |
|---|
| 106 | typedef typename MR1::size_type size_type; |
|---|
| 107 | if(w1.size() != w2.size()) |
|---|
| 108 | { |
|---|
| 109 | BOOST_REGEX_TEST_ERROR("Size mismatch in match_results class", UChar32); |
|---|
| 110 | } |
|---|
| 111 | for(int i = 0; i < (int)w1.size(); ++i) |
|---|
| 112 | { |
|---|
| 113 | if(w1[i].matched) |
|---|
| 114 | { |
|---|
| 115 | if(w2[i].matched == 0) |
|---|
| 116 | { |
|---|
| 117 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); |
|---|
| 118 | } |
|---|
| 119 | if((w1.position(i) != boost::re_detail::distance(iterator_type(w2.prefix().first), iterator_type(w2[i].first))) || (w1.length(i) != boost::re_detail::distance(iterator_type(w2[i].first), iterator_type(w2[i].second)))) |
|---|
| 120 | { |
|---|
| 121 | BOOST_REGEX_TEST_ERROR("Iterator mismatch in match_results class", UChar32); |
|---|
| 122 | } |
|---|
| 123 | } |
|---|
| 124 | else if(w2[i].matched) |
|---|
| 125 | { |
|---|
| 126 | BOOST_REGEX_TEST_ERROR("Matched mismatch in match_results class", UChar32); |
|---|
| 127 | } |
|---|
| 128 | } |
|---|
| 129 | } |
|---|
| 130 | |
|---|
| 131 | void test_icu_grep(const boost::u32regex& r, const std::vector< ::UChar32>& search_text) |
|---|
| 132 | { |
|---|
| 133 | typedef std::vector< ::UChar32>::const_iterator const_iterator; |
|---|
| 134 | typedef boost::u32regex_iterator<const_iterator> test_iterator; |
|---|
| 135 | boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options(); |
|---|
| 136 | const int* answer_table = test_info<wchar_t>::answer_table(); |
|---|
| 137 | test_iterator start(search_text.begin(), search_text.end(), r, opts), end; |
|---|
| 138 | test_iterator copy(start); |
|---|
| 139 | const_iterator last_end = search_text.begin(); |
|---|
| 140 | while(start != end) |
|---|
| 141 | { |
|---|
| 142 | if(start != copy) |
|---|
| 143 | { |
|---|
| 144 | BOOST_REGEX_TEST_ERROR("Failed iterator != comparison.", wchar_t); |
|---|
| 145 | } |
|---|
| 146 | if(!(start == copy)) |
|---|
| 147 | { |
|---|
| 148 | BOOST_REGEX_TEST_ERROR("Failed iterator == comparison.", wchar_t); |
|---|
| 149 | } |
|---|
| 150 | test_result(*start, search_text.begin(), answer_table); |
|---|
| 151 | // test $` and $' : |
|---|
| 152 | if(start->prefix().first != last_end) |
|---|
| 153 | { |
|---|
| 154 | BOOST_REGEX_TEST_ERROR("Incorrect position for start of $`", wchar_t); |
|---|
| 155 | } |
|---|
| 156 | if(start->prefix().second != (*start)[0].first) |
|---|
| 157 | { |
|---|
| 158 | BOOST_REGEX_TEST_ERROR("Incorrect position for end of $`", wchar_t); |
|---|
| 159 | } |
|---|
| 160 | if(start->prefix().matched != (start->prefix().first != start->prefix().second)) |
|---|
| 161 | { |
|---|
| 162 | BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $`", wchar_t); |
|---|
| 163 | } |
|---|
| 164 | if(start->suffix().first != (*start)[0].second) |
|---|
| 165 | { |
|---|
| 166 | BOOST_REGEX_TEST_ERROR("Incorrect position for start of $'", wchar_t); |
|---|
| 167 | } |
|---|
| 168 | if(start->suffix().second != search_text.end()) |
|---|
| 169 | { |
|---|
| 170 | BOOST_REGEX_TEST_ERROR("Incorrect position for end of $'", wchar_t); |
|---|
| 171 | } |
|---|
| 172 | if(start->suffix().matched != (start->suffix().first != start->suffix().second)) |
|---|
| 173 | { |
|---|
| 174 | BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $'", wchar_t); |
|---|
| 175 | } |
|---|
| 176 | last_end = (*start)[0].second; |
|---|
| 177 | ++start; |
|---|
| 178 | ++copy; |
|---|
| 179 | // move on the answer table to next set of answers; |
|---|
| 180 | if(*answer_table != -2) |
|---|
| 181 | while(*answer_table++ != -2){} |
|---|
| 182 | } |
|---|
| 183 | if(answer_table[0] >= 0) |
|---|
| 184 | { |
|---|
| 185 | // we should have had a match but didn't: |
|---|
| 186 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", wchar_t); |
|---|
| 187 | } |
|---|
| 188 | } |
|---|
| 189 | |
|---|
| 190 | void test_icu(const wchar_t&, const test_regex_search_tag& ) |
|---|
| 191 | { |
|---|
| 192 | boost::u32regex r; |
|---|
| 193 | if(*test_locale::c_str()) |
|---|
| 194 | { |
|---|
| 195 | U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str()); |
|---|
| 196 | if(l.isBogus()) |
|---|
| 197 | return; |
|---|
| 198 | r.imbue(l); |
|---|
| 199 | } |
|---|
| 200 | |
|---|
| 201 | std::vector< ::UChar32> expression; |
|---|
| 202 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 203 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); |
|---|
| 204 | #else |
|---|
| 205 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); |
|---|
| 206 | #endif |
|---|
| 207 | boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options(); |
|---|
| 208 | try{ |
|---|
| 209 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) |
|---|
| 210 | r.assign(expression.begin(), expression.end(), syntax_options); |
|---|
| 211 | #else |
|---|
| 212 | if(expression.size()) |
|---|
| 213 | r.assign(&*expression.begin(), expression.size(), syntax_options); |
|---|
| 214 | else |
|---|
| 215 | r.assign(static_cast<UChar32 const*>(0), expression.size(), syntax_options); |
|---|
| 216 | #endif |
|---|
| 217 | if(r.status()) |
|---|
| 218 | { |
|---|
| 219 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32); |
|---|
| 220 | } |
|---|
| 221 | std::vector< ::UChar32> search_text; |
|---|
| 222 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 223 | search_text.assign(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end()); |
|---|
| 224 | #else |
|---|
| 225 | std::copy(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end(), std::back_inserter(search_text)); |
|---|
| 226 | #endif |
|---|
| 227 | boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options(); |
|---|
| 228 | const int* answer_table = test_info<wchar_t>::answer_table(); |
|---|
| 229 | boost::match_results<std::vector< ::UChar32>::const_iterator> what; |
|---|
| 230 | if(boost::u32regex_search( |
|---|
| 231 | const_cast<std::vector< ::UChar32>const&>(search_text).begin(), |
|---|
| 232 | const_cast<std::vector< ::UChar32>const&>(search_text).end(), |
|---|
| 233 | what, |
|---|
| 234 | r, |
|---|
| 235 | opts)) |
|---|
| 236 | { |
|---|
| 237 | test_result(what, const_cast<std::vector< ::UChar32>const&>(search_text).begin(), answer_table); |
|---|
| 238 | } |
|---|
| 239 | else if(answer_table[0] >= 0) |
|---|
| 240 | { |
|---|
| 241 | // we should have had a match but didn't: |
|---|
| 242 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); |
|---|
| 243 | } |
|---|
| 244 | |
|---|
| 245 | if(0 == *test_locale::c_str()) |
|---|
| 246 | { |
|---|
| 247 | // |
|---|
| 248 | // Now try UTF-16 construction: |
|---|
| 249 | // |
|---|
| 250 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; |
|---|
| 251 | std::vector<UChar> expression16, text16; |
|---|
| 252 | boost::match_results<std::vector<UChar>::const_iterator> what16; |
|---|
| 253 | boost::match_results<const UChar*> what16c; |
|---|
| 254 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 255 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); |
|---|
| 256 | text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end())); |
|---|
| 257 | #else |
|---|
| 258 | expression16.clear(); |
|---|
| 259 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); |
|---|
| 260 | text16.clear(); |
|---|
| 261 | std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16)); |
|---|
| 262 | #endif |
|---|
| 263 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); |
|---|
| 264 | if(boost::u32regex_search(const_cast<const std::vector<UChar>&>(text16).begin(), const_cast<const std::vector<UChar>&>(text16).end(), what16, r, opts)) |
|---|
| 265 | { |
|---|
| 266 | compare_result(what, what16, static_cast<boost::mpl::int_<2> const*>(0)); |
|---|
| 267 | } |
|---|
| 268 | else if(answer_table[0] >= 0) |
|---|
| 269 | { |
|---|
| 270 | // we should have had a match but didn't: |
|---|
| 271 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); |
|---|
| 272 | } |
|---|
| 273 | if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end()) |
|---|
| 274 | { |
|---|
| 275 | expression16.push_back(0); |
|---|
| 276 | r = boost::make_u32regex(&*expression16.begin(), syntax_options); |
|---|
| 277 | if(std::find(text16.begin(), text16.end(), 0) == text16.end()) |
|---|
| 278 | { |
|---|
| 279 | text16.push_back(0); |
|---|
| 280 | if(boost::u32regex_search((const UChar*)&*text16.begin(), what16c, r, opts)) |
|---|
| 281 | { |
|---|
| 282 | compare_result(what, what16c, static_cast<boost::mpl::int_<2> const*>(0)); |
|---|
| 283 | } |
|---|
| 284 | else if(answer_table[0] >= 0) |
|---|
| 285 | { |
|---|
| 286 | // we should have had a match but didn't: |
|---|
| 287 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); |
|---|
| 288 | } |
|---|
| 289 | } |
|---|
| 290 | } |
|---|
| 291 | // |
|---|
| 292 | // Now try UTF-8 construction: |
|---|
| 293 | // |
|---|
| 294 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv; |
|---|
| 295 | std::vector<unsigned char> expression8, text8; |
|---|
| 296 | boost::match_results<std::vector<unsigned char>::const_iterator> what8; |
|---|
| 297 | boost::match_results<const unsigned char*> what8c; |
|---|
| 298 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 299 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); |
|---|
| 300 | text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end())); |
|---|
| 301 | #else |
|---|
| 302 | expression8.clear(); |
|---|
| 303 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); |
|---|
| 304 | text8.clear(); |
|---|
| 305 | std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8)); |
|---|
| 306 | #endif |
|---|
| 307 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); |
|---|
| 308 | if(boost::u32regex_search(const_cast<const std::vector<unsigned char>&>(text8).begin(), const_cast<const std::vector<unsigned char>&>(text8).end(), what8, r, opts)) |
|---|
| 309 | { |
|---|
| 310 | compare_result(what, what8, static_cast<boost::mpl::int_<1> const*>(0)); |
|---|
| 311 | } |
|---|
| 312 | else if(answer_table[0] >= 0) |
|---|
| 313 | { |
|---|
| 314 | // we should have had a match but didn't: |
|---|
| 315 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); |
|---|
| 316 | } |
|---|
| 317 | if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end()) |
|---|
| 318 | { |
|---|
| 319 | expression8.push_back(0); |
|---|
| 320 | r = boost::make_u32regex(&*expression8.begin(), syntax_options); |
|---|
| 321 | if(std::find(text8.begin(), text8.end(), 0) == text8.end()) |
|---|
| 322 | { |
|---|
| 323 | text8.push_back(0); |
|---|
| 324 | if(boost::u32regex_search((const unsigned char*)&*text8.begin(), what8c, r, opts)) |
|---|
| 325 | { |
|---|
| 326 | compare_result(what, what8c, static_cast<boost::mpl::int_<1> const*>(0)); |
|---|
| 327 | } |
|---|
| 328 | else if(answer_table[0] >= 0) |
|---|
| 329 | { |
|---|
| 330 | // we should have had a match but didn't: |
|---|
| 331 | BOOST_REGEX_TEST_ERROR("Expected match was not found.", UChar32); |
|---|
| 332 | } |
|---|
| 333 | } |
|---|
| 334 | } |
|---|
| 335 | } |
|---|
| 336 | // |
|---|
| 337 | // finally try a grep: |
|---|
| 338 | // |
|---|
| 339 | test_icu_grep(r, search_text); |
|---|
| 340 | } |
|---|
| 341 | catch(const boost::bad_expression& e) |
|---|
| 342 | { |
|---|
| 343 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32); |
|---|
| 344 | } |
|---|
| 345 | catch(const std::runtime_error& r) |
|---|
| 346 | { |
|---|
| 347 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << r.what(), UChar32); |
|---|
| 348 | } |
|---|
| 349 | catch(const std::exception& r) |
|---|
| 350 | { |
|---|
| 351 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << r.what(), UChar32); |
|---|
| 352 | } |
|---|
| 353 | catch(...) |
|---|
| 354 | { |
|---|
| 355 | BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32); |
|---|
| 356 | } |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | void test_icu(const wchar_t&, const test_invalid_regex_tag&) |
|---|
| 360 | { |
|---|
| 361 | typedef boost::u16_to_u32_iterator<std::wstring::const_iterator, ::UChar32> conv_iterator; |
|---|
| 362 | std::vector< ::UChar32> expression; |
|---|
| 363 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 364 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); |
|---|
| 365 | #else |
|---|
| 366 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); |
|---|
| 367 | #endif |
|---|
| 368 | boost::regex_constants::syntax_option_type syntax_options = test_info<wchar_t>::syntax_options(); |
|---|
| 369 | boost::u32regex r; |
|---|
| 370 | if(*test_locale::c_str()) |
|---|
| 371 | { |
|---|
| 372 | U_NAMESPACE_QUALIFIER Locale l(test_locale::c_str()); |
|---|
| 373 | if(l.isBogus()) |
|---|
| 374 | return; |
|---|
| 375 | r.imbue(l); |
|---|
| 376 | } |
|---|
| 377 | // |
|---|
| 378 | // try it with exceptions disabled first: |
|---|
| 379 | // |
|---|
| 380 | try |
|---|
| 381 | { |
|---|
| 382 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) |
|---|
| 383 | if(0 == r.assign(expression.begin(), expression.end(), syntax_options | boost::regex_constants::no_except).status()) |
|---|
| 384 | #else |
|---|
| 385 | if(expression.size()) |
|---|
| 386 | r.assign(&*expression.begin(), expression.size(), syntax_options | boost::regex_constants::no_except); |
|---|
| 387 | else |
|---|
| 388 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options | boost::regex_constants::no_except); |
|---|
| 389 | if(0 == r.status()) |
|---|
| 390 | #endif |
|---|
| 391 | { |
|---|
| 392 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); |
|---|
| 393 | } |
|---|
| 394 | } |
|---|
| 395 | catch(...) |
|---|
| 396 | { |
|---|
| 397 | BOOST_REGEX_TEST_ERROR("Unexpected exception thrown.", wchar_t); |
|---|
| 398 | } |
|---|
| 399 | // |
|---|
| 400 | // now try again with exceptions: |
|---|
| 401 | // |
|---|
| 402 | bool have_catch = false; |
|---|
| 403 | try{ |
|---|
| 404 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) |
|---|
| 405 | r.assign(expression.begin(), expression.end(), syntax_options); |
|---|
| 406 | #else |
|---|
| 407 | if(expression.size()) |
|---|
| 408 | r.assign(&*expression.begin(), expression.size(), syntax_options); |
|---|
| 409 | else |
|---|
| 410 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options); |
|---|
| 411 | #endif |
|---|
| 412 | #ifdef BOOST_NO_EXCEPTIONS |
|---|
| 413 | if(r.status()) |
|---|
| 414 | have_catch = true; |
|---|
| 415 | #endif |
|---|
| 416 | } |
|---|
| 417 | catch(const boost::bad_expression&) |
|---|
| 418 | { |
|---|
| 419 | have_catch = true; |
|---|
| 420 | } |
|---|
| 421 | catch(const std::runtime_error& r) |
|---|
| 422 | { |
|---|
| 423 | have_catch = true; |
|---|
| 424 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::runtime_error instead: " << r.what(), wchar_t); |
|---|
| 425 | } |
|---|
| 426 | catch(const std::exception& r) |
|---|
| 427 | { |
|---|
| 428 | have_catch = true; |
|---|
| 429 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but a std::exception instead: " << r.what(), wchar_t); |
|---|
| 430 | } |
|---|
| 431 | catch(...) |
|---|
| 432 | { |
|---|
| 433 | have_catch = true; |
|---|
| 434 | BOOST_REGEX_TEST_ERROR("Expected a bad_expression exception, but got an exception of unknown type instead", wchar_t); |
|---|
| 435 | } |
|---|
| 436 | if(!have_catch) |
|---|
| 437 | { |
|---|
| 438 | // oops expected exception was not thrown: |
|---|
| 439 | BOOST_REGEX_TEST_ERROR("Expected an exception, but didn't find one.", wchar_t); |
|---|
| 440 | } |
|---|
| 441 | |
|---|
| 442 | if(0 == *test_locale::c_str()) |
|---|
| 443 | { |
|---|
| 444 | // |
|---|
| 445 | // Now try UTF-16 construction: |
|---|
| 446 | // |
|---|
| 447 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; |
|---|
| 448 | std::vector<UChar> expression16; |
|---|
| 449 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 450 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); |
|---|
| 451 | #else |
|---|
| 452 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); |
|---|
| 453 | #endif |
|---|
| 454 | if(0 == boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options | boost::regex_constants::no_except).status()) |
|---|
| 455 | { |
|---|
| 456 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); |
|---|
| 457 | } |
|---|
| 458 | if(std::find(expression16.begin(), expression16.end(), 0) == expression16.end()) |
|---|
| 459 | { |
|---|
| 460 | expression16.push_back(0); |
|---|
| 461 | if(0 == boost::make_u32regex(&*expression16.begin(), syntax_options | boost::regex_constants::no_except).status()) |
|---|
| 462 | { |
|---|
| 463 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); |
|---|
| 464 | } |
|---|
| 465 | } |
|---|
| 466 | // |
|---|
| 467 | // Now try UTF-8 construction: |
|---|
| 468 | // |
|---|
| 469 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator> u8_conv; |
|---|
| 470 | std::vector<unsigned char> expression8; |
|---|
| 471 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 472 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); |
|---|
| 473 | #else |
|---|
| 474 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); |
|---|
| 475 | #endif |
|---|
| 476 | if(0 == boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options | boost::regex_constants::no_except).status()) |
|---|
| 477 | { |
|---|
| 478 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); |
|---|
| 479 | } |
|---|
| 480 | if(std::find(expression8.begin(), expression8.end(), 0) == expression8.end()) |
|---|
| 481 | { |
|---|
| 482 | expression8.push_back(0); |
|---|
| 483 | if(0 == boost::make_u32regex(&*expression8.begin(), syntax_options | boost::regex_constants::no_except).status()) |
|---|
| 484 | { |
|---|
| 485 | BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t); |
|---|
| 486 | } |
|---|
| 487 | } |
|---|
| 488 | } |
|---|
| 489 | } |
|---|
| 490 | |
|---|
| 491 | void test_icu(const wchar_t&, const test_regex_replace_tag&) |
|---|
| 492 | { |
|---|
| 493 | std::vector< ::UChar32> expression; |
|---|
| 494 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 495 | expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end()); |
|---|
| 496 | #else |
|---|
| 497 | std::copy(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end(), std::back_inserter(expression)); |
|---|
| 498 | #endif |
|---|
| 499 | boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options(); |
|---|
| 500 | boost::u32regex r; |
|---|
| 501 | try{ |
|---|
| 502 | #if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) |
|---|
| 503 | r.assign(expression.begin(), expression.end(), syntax_options); |
|---|
| 504 | #else |
|---|
| 505 | if(expression.size()) |
|---|
| 506 | r.assign(&*expression.begin(), expression.size(), syntax_options); |
|---|
| 507 | else |
|---|
| 508 | r.assign(static_cast<UChar32 const*>(0), static_cast<boost::u32regex::size_type>(0), syntax_options); |
|---|
| 509 | #endif |
|---|
| 510 | if(r.status()) |
|---|
| 511 | { |
|---|
| 512 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32); |
|---|
| 513 | } |
|---|
| 514 | typedef std::vector<UChar32> string_type; |
|---|
| 515 | string_type search_text; |
|---|
| 516 | boost::regex_constants::match_flag_type opts = test_info<UChar32>::match_options(); |
|---|
| 517 | string_type format_string; |
|---|
| 518 | string_type result_string; |
|---|
| 519 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 520 | search_text.assign(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end()); |
|---|
| 521 | format_string.assign(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end()); |
|---|
| 522 | format_string.push_back(0); |
|---|
| 523 | result_string.assign(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end()); |
|---|
| 524 | #else |
|---|
| 525 | std::copy(test_info<UChar32>::search_text().begin(), test_info<UChar32>::search_text().end(), std::back_inserter(search_text)); |
|---|
| 526 | std::copy(test_info<UChar32>::format_string().begin(), test_info<UChar32>::format_string().end(), std::back_inserter(format_string)); |
|---|
| 527 | format_string.push_back(0); |
|---|
| 528 | std::copy(test_info<UChar32>::result_string().begin(), test_info<UChar32>::result_string().end(), std::back_inserter(result_string)); |
|---|
| 529 | #endif |
|---|
| 530 | string_type result; |
|---|
| 531 | |
|---|
| 532 | boost::u32regex_replace(unnecessary_fix::back_inserter(result), search_text.begin(), search_text.end(), r, &*format_string.begin(), opts); |
|---|
| 533 | if(result != result_string) |
|---|
| 534 | { |
|---|
| 535 | BOOST_REGEX_TEST_ERROR("regex_replace generated an incorrect string result", UChar32); |
|---|
| 536 | } |
|---|
| 537 | // |
|---|
| 538 | // Mixed mode character encoding: |
|---|
| 539 | // |
|---|
| 540 | if(0 == *test_locale::c_str()) |
|---|
| 541 | { |
|---|
| 542 | // |
|---|
| 543 | // Now try UTF-16 construction: |
|---|
| 544 | // |
|---|
| 545 | typedef boost::u32_to_u16_iterator<std::vector<UChar32>::const_iterator> u16_conv; |
|---|
| 546 | std::vector<UChar> expression16, text16, format16, result16, found16; |
|---|
| 547 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 548 | expression16.assign(u16_conv(expression.begin()), u16_conv(expression.end())); |
|---|
| 549 | text16.assign(u16_conv(search_text.begin()), u16_conv(search_text.end())); |
|---|
| 550 | format16.assign(u16_conv(format_string.begin()), u16_conv(format_string.end())); |
|---|
| 551 | result16.assign(u16_conv(result_string.begin()), u16_conv(result_string.end())); |
|---|
| 552 | #else |
|---|
| 553 | std::copy(u16_conv(expression.begin()), u16_conv(expression.end()), std::back_inserter(expression16)); |
|---|
| 554 | std::copy(u16_conv(search_text.begin()), u16_conv(search_text.end()), std::back_inserter(text16)); |
|---|
| 555 | std::copy(u16_conv(format_string.begin()), u16_conv(format_string.end()), std::back_inserter(format16)); |
|---|
| 556 | std::copy(u16_conv(result_string.begin()), u16_conv(result_string.end()), std::back_inserter(result16)); |
|---|
| 557 | #endif |
|---|
| 558 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); |
|---|
| 559 | boost::u32regex_replace(unnecessary_fix::back_inserter(found16), text16.begin(), text16.end(), r, &*format16.begin(), opts); |
|---|
| 560 | if(result16 != found16) |
|---|
| 561 | { |
|---|
| 562 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32); |
|---|
| 563 | } |
|---|
| 564 | // |
|---|
| 565 | // Now with UnicodeString: |
|---|
| 566 | // |
|---|
| 567 | UnicodeString expression16u, text16u, format16u, result16u, found16u; |
|---|
| 568 | if(expression16.size()) |
|---|
| 569 | expression16u.setTo(&*expression16.begin(), expression16.size()); |
|---|
| 570 | if(text16.size()) |
|---|
| 571 | text16u.setTo(&*text16.begin(), text16.size()); |
|---|
| 572 | format16u.setTo(&*format16.begin(), format16.size()-1); |
|---|
| 573 | if(result16.size()) |
|---|
| 574 | result16u.setTo(&*result16.begin(), result16.size()); |
|---|
| 575 | r = boost::make_u32regex(expression16.begin(), expression16.end(), syntax_options); |
|---|
| 576 | found16u = boost::u32regex_replace(text16u, r, format16u, opts); |
|---|
| 577 | if(result16u != found16u) |
|---|
| 578 | { |
|---|
| 579 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-16 string returned incorrect result", UChar32); |
|---|
| 580 | } |
|---|
| 581 | |
|---|
| 582 | // |
|---|
| 583 | // Now try UTF-8 construction: |
|---|
| 584 | // |
|---|
| 585 | typedef boost::u32_to_u8_iterator<std::vector<UChar32>::const_iterator, unsigned char> u8_conv; |
|---|
| 586 | std::vector<char> expression8, text8, format8, result8, found8; |
|---|
| 587 | #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS |
|---|
| 588 | expression8.assign(u8_conv(expression.begin()), u8_conv(expression.end())); |
|---|
| 589 | text8.assign(u8_conv(search_text.begin()), u8_conv(search_text.end())); |
|---|
| 590 | format8.assign(u8_conv(format_string.begin()), u8_conv(format_string.end())); |
|---|
| 591 | result8.assign(u8_conv(result_string.begin()), u8_conv(result_string.end())); |
|---|
| 592 | #else |
|---|
| 593 | std::copy(u8_conv(expression.begin()), u8_conv(expression.end()), std::back_inserter(expression8)); |
|---|
| 594 | std::copy(u8_conv(search_text.begin()), u8_conv(search_text.end()), std::back_inserter(text8)); |
|---|
| 595 | std::copy(u8_conv(format_string.begin()), u8_conv(format_string.end()), std::back_inserter(format8)); |
|---|
| 596 | std::copy(u8_conv(result_string.begin()), u8_conv(result_string.end()), std::back_inserter(result8)); |
|---|
| 597 | #endif |
|---|
| 598 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); |
|---|
| 599 | boost::u32regex_replace(unnecessary_fix::back_inserter(found8), text8.begin(), text8.end(), r, &*format8.begin(), opts); |
|---|
| 600 | if(result8 != found8) |
|---|
| 601 | { |
|---|
| 602 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32); |
|---|
| 603 | } |
|---|
| 604 | // |
|---|
| 605 | // Now with std::string and UTF-8: |
|---|
| 606 | // |
|---|
| 607 | std::string expression8s, text8s, format8s, result8s, found8s; |
|---|
| 608 | if(expression8.size()) |
|---|
| 609 | expression8s.assign(&*expression8.begin(), expression8.size()); |
|---|
| 610 | if(text8.size()) |
|---|
| 611 | text8s.assign(&*text8.begin(), text8.size()); |
|---|
| 612 | format8s.assign(&*format8.begin(), format8.size()-1); |
|---|
| 613 | if(result8.size()) |
|---|
| 614 | result8s.assign(&*result8.begin(), result8.size()); |
|---|
| 615 | r = boost::make_u32regex(expression8.begin(), expression8.end(), syntax_options); |
|---|
| 616 | found8s = boost::u32regex_replace(text8s, r, format8s, opts); |
|---|
| 617 | if(result8s != found8s) |
|---|
| 618 | { |
|---|
| 619 | BOOST_REGEX_TEST_ERROR("u32regex_replace with UTF-8 string returned incorrect result", UChar32); |
|---|
| 620 | } |
|---|
| 621 | } |
|---|
| 622 | } |
|---|
| 623 | catch(const boost::bad_expression& e) |
|---|
| 624 | { |
|---|
| 625 | BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done: " << e.what(), UChar32); |
|---|
| 626 | } |
|---|
| 627 | catch(const std::runtime_error& r) |
|---|
| 628 | { |
|---|
| 629 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::runtime_error: " << r.what(), UChar32); |
|---|
| 630 | } |
|---|
| 631 | catch(const std::exception& r) |
|---|
| 632 | { |
|---|
| 633 | BOOST_REGEX_TEST_ERROR("Received an unexpected std::exception: " << r.what(), UChar32); |
|---|
| 634 | } |
|---|
| 635 | catch(...) |
|---|
| 636 | { |
|---|
| 637 | BOOST_REGEX_TEST_ERROR("Received an unexpected exception of unknown type", UChar32); |
|---|
| 638 | } |
|---|
| 639 | } |
|---|
| 640 | |
|---|
| 641 | #else |
|---|
| 642 | |
|---|
| 643 | #include "test.hpp" |
|---|
| 644 | |
|---|
| 645 | void test_icu(const wchar_t&, const test_regex_search_tag&){} |
|---|
| 646 | void test_icu(const wchar_t&, const test_invalid_regex_tag&){} |
|---|
| 647 | void test_icu(const wchar_t&, const test_regex_replace_tag&){} |
|---|
| 648 | |
|---|
| 649 | #endif |
|---|
| 650 | |
|---|