| 1 | /* |
|---|
| 2 | * |
|---|
| 3 | * Copyright (c) 2002 |
|---|
| 4 | * John Maddock |
|---|
| 5 | * |
|---|
| 6 | * Use, modification and distribution are subject to the |
|---|
| 7 | * Boost Software License, Version 1.0. (See accompanying file |
|---|
| 8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 9 | * |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | #include <iostream> |
|---|
| 13 | #include <fstream> |
|---|
| 14 | #include <iterator> |
|---|
| 15 | #include <cassert> |
|---|
| 16 | #include <boost/test/execution_monitor.hpp> |
|---|
| 17 | #include "regex_comparison.hpp" |
|---|
| 18 | |
|---|
| 19 | |
|---|
| 20 | void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase) |
|---|
| 21 | { |
|---|
| 22 | double time; |
|---|
| 23 | results r(re, description); |
|---|
| 24 | |
|---|
| 25 | std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl; |
|---|
| 26 | |
|---|
| 27 | #ifdef BOOST_HAS_GRETA |
|---|
| 28 | if(time_greta == true) |
|---|
| 29 | { |
|---|
| 30 | time = g::time_match(re, text, icase); |
|---|
| 31 | r.greta_time = time; |
|---|
| 32 | std::cout << "\tGRETA regex: " << time << "s\n"; |
|---|
| 33 | } |
|---|
| 34 | if(time_safe_greta == true) |
|---|
| 35 | { |
|---|
| 36 | time = gs::time_match(re, text, icase); |
|---|
| 37 | r.safe_greta_time = time; |
|---|
| 38 | std::cout << "\tSafe GRETA regex: " << time << "s\n"; |
|---|
| 39 | } |
|---|
| 40 | #endif |
|---|
| 41 | if(time_boost == true) |
|---|
| 42 | { |
|---|
| 43 | time = b::time_match(re, text, icase); |
|---|
| 44 | r.boost_time = time; |
|---|
| 45 | std::cout << "\tBoost regex: " << time << "s\n"; |
|---|
| 46 | } |
|---|
| 47 | if(time_localised_boost == true) |
|---|
| 48 | { |
|---|
| 49 | time = bl::time_match(re, text, icase); |
|---|
| 50 | r.localised_boost_time = time; |
|---|
| 51 | std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; |
|---|
| 52 | } |
|---|
| 53 | #ifdef BOOST_HAS_POSIX |
|---|
| 54 | if(time_posix == true) |
|---|
| 55 | { |
|---|
| 56 | time = posix::time_match(re, text, icase); |
|---|
| 57 | r.posix_time = time; |
|---|
| 58 | std::cout << "\tPOSIX regex: " << time << "s\n"; |
|---|
| 59 | } |
|---|
| 60 | #endif |
|---|
| 61 | #ifdef BOOST_HAS_PCRE |
|---|
| 62 | if(time_pcre == true) |
|---|
| 63 | { |
|---|
| 64 | time = pcr::time_match(re, text, icase); |
|---|
| 65 | r.pcre_time = time; |
|---|
| 66 | std::cout << "\tPCRE regex: " << time << "s\n"; |
|---|
| 67 | } |
|---|
| 68 | #endif |
|---|
| 69 | #ifdef BOOST_HAS_XPRESSIVE |
|---|
| 70 | if(time_xpressive == true) |
|---|
| 71 | { |
|---|
| 72 | time = dxpr::time_match(re, text, icase); |
|---|
| 73 | r.xpressive_time = time; |
|---|
| 74 | std::cout << "\txpressive regex: " << time << "s\n"; |
|---|
| 75 | } |
|---|
| 76 | #endif |
|---|
| 77 | r.finalise(); |
|---|
| 78 | result_list.push_back(r); |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase) |
|---|
| 82 | { |
|---|
| 83 | std::cout << "Testing: " << re << std::endl; |
|---|
| 84 | |
|---|
| 85 | double time; |
|---|
| 86 | results r(re, description); |
|---|
| 87 | |
|---|
| 88 | #ifdef BOOST_HAS_GRETA |
|---|
| 89 | if(time_greta == true) |
|---|
| 90 | { |
|---|
| 91 | time = g::time_find_all(re, text, icase); |
|---|
| 92 | r.greta_time = time; |
|---|
| 93 | std::cout << "\tGRETA regex: " << time << "s\n"; |
|---|
| 94 | } |
|---|
| 95 | if(time_safe_greta == true) |
|---|
| 96 | { |
|---|
| 97 | time = gs::time_find_all(re, text, icase); |
|---|
| 98 | r.safe_greta_time = time; |
|---|
| 99 | std::cout << "\tSafe GRETA regex: " << time << "s\n"; |
|---|
| 100 | } |
|---|
| 101 | #endif |
|---|
| 102 | if(time_boost == true) |
|---|
| 103 | { |
|---|
| 104 | time = b::time_find_all(re, text, icase); |
|---|
| 105 | r.boost_time = time; |
|---|
| 106 | std::cout << "\tBoost regex: " << time << "s\n"; |
|---|
| 107 | } |
|---|
| 108 | if(time_localised_boost == true) |
|---|
| 109 | { |
|---|
| 110 | time = bl::time_find_all(re, text, icase); |
|---|
| 111 | r.localised_boost_time = time; |
|---|
| 112 | std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; |
|---|
| 113 | } |
|---|
| 114 | #ifdef BOOST_HAS_POSIX |
|---|
| 115 | if(time_posix == true) |
|---|
| 116 | { |
|---|
| 117 | time = posix::time_find_all(re, text, icase); |
|---|
| 118 | r.posix_time = time; |
|---|
| 119 | std::cout << "\tPOSIX regex: " << time << "s\n"; |
|---|
| 120 | } |
|---|
| 121 | #endif |
|---|
| 122 | #ifdef BOOST_HAS_PCRE |
|---|
| 123 | if(time_pcre == true) |
|---|
| 124 | { |
|---|
| 125 | time = pcr::time_find_all(re, text, icase); |
|---|
| 126 | r.pcre_time = time; |
|---|
| 127 | std::cout << "\tPCRE regex: " << time << "s\n"; |
|---|
| 128 | } |
|---|
| 129 | #endif |
|---|
| 130 | #ifdef BOOST_HAS_XPRESSIVE |
|---|
| 131 | if(time_xpressive == true) |
|---|
| 132 | { |
|---|
| 133 | time = dxpr::time_find_all(re, text, icase); |
|---|
| 134 | r.xpressive_time = time; |
|---|
| 135 | std::cout << "\txpressive regex: " << time << "s\n"; |
|---|
| 136 | } |
|---|
| 137 | #endif |
|---|
| 138 | r.finalise(); |
|---|
| 139 | result_list.push_back(r); |
|---|
| 140 | } |
|---|
| 141 | |
|---|
| 142 | int cpp_main(int argc, char * argv[]) |
|---|
| 143 | { |
|---|
| 144 | // start by processing the command line args: |
|---|
| 145 | if(argc < 2) |
|---|
| 146 | return show_usage(); |
|---|
| 147 | int result = 0; |
|---|
| 148 | for(int c = 1; c < argc; ++c) |
|---|
| 149 | { |
|---|
| 150 | result += handle_argument(argv[c]); |
|---|
| 151 | } |
|---|
| 152 | if(result) |
|---|
| 153 | return result; |
|---|
| 154 | |
|---|
| 155 | if(test_matches) |
|---|
| 156 | { |
|---|
| 157 | // start with a simple test, this is basically a measure of the minimal overhead |
|---|
| 158 | // involved in calling a regex matcher: |
|---|
| 159 | test_match("abc", "abc"); |
|---|
| 160 | // these are from the regex docs: |
|---|
| 161 | test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string"); |
|---|
| 162 | test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456"); |
|---|
| 163 | // these are from http://www.regxlib.com/ |
|---|
| 164 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john@johnmaddock.co.uk"); |
|---|
| 165 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu"); |
|---|
| 166 | test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv"); |
|---|
| 167 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ"); |
|---|
| 168 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA"); |
|---|
| 169 | test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ"); |
|---|
| 170 | test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001"); |
|---|
| 171 | test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001"); |
|---|
| 172 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123"); |
|---|
| 173 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159"); |
|---|
| 174 | test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159"); |
|---|
| 175 | } |
|---|
| 176 | output_html_results(true, "%short_matches%"); |
|---|
| 177 | |
|---|
| 178 | std::string file_contents; |
|---|
| 179 | |
|---|
| 180 | if(test_code) |
|---|
| 181 | { |
|---|
| 182 | load_file(file_contents, "../../../boost/crc.hpp"); |
|---|
| 183 | |
|---|
| 184 | const char* highlight_expression = // preprocessor directives: index 1 |
|---|
| 185 | "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|" |
|---|
| 186 | // comment: index 2 |
|---|
| 187 | "(//[^\\n]*|/\\*.*?\\*/)|" |
|---|
| 188 | // literals: index 3 |
|---|
| 189 | "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" |
|---|
| 190 | // string literals: index 4 |
|---|
| 191 | "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" |
|---|
| 192 | // keywords: index 5 |
|---|
| 193 | "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" |
|---|
| 194 | "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" |
|---|
| 195 | "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" |
|---|
| 196 | "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" |
|---|
| 197 | "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" |
|---|
| 198 | "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" |
|---|
| 199 | "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" |
|---|
| 200 | "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" |
|---|
| 201 | "|using|virtual|void|volatile|wchar_t|while)\\>" |
|---|
| 202 | ; |
|---|
| 203 | |
|---|
| 204 | const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" |
|---|
| 205 | "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?" |
|---|
| 206 | "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" |
|---|
| 207 | "(\\{|:[^;\\{()]*\\{)"; |
|---|
| 208 | |
|---|
| 209 | const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)"; |
|---|
| 210 | const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)"; |
|---|
| 211 | |
|---|
| 212 | |
|---|
| 213 | test_find_all(class_expression, file_contents); |
|---|
| 214 | test_find_all(highlight_expression, file_contents); |
|---|
| 215 | test_find_all(include_expression, file_contents); |
|---|
| 216 | test_find_all(boost_include_expression, file_contents); |
|---|
| 217 | } |
|---|
| 218 | output_html_results(false, "%code_search%"); |
|---|
| 219 | |
|---|
| 220 | if(test_html) |
|---|
| 221 | { |
|---|
| 222 | load_file(file_contents, "../../../libs/libraries.htm"); |
|---|
| 223 | test_find_all("beman|john|dave", file_contents, true); |
|---|
| 224 | test_find_all("<p>.*?</p>", file_contents, true); |
|---|
| 225 | test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); |
|---|
| 226 | test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true); |
|---|
| 227 | test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); |
|---|
| 228 | test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true); |
|---|
| 229 | } |
|---|
| 230 | output_html_results(false, "%html_search%"); |
|---|
| 231 | |
|---|
| 232 | if(test_short_twain) |
|---|
| 233 | { |
|---|
| 234 | load_file(file_contents, "short_twain.txt"); |
|---|
| 235 | |
|---|
| 236 | test_find_all("Twain", file_contents); |
|---|
| 237 | test_find_all("Huck[[:alpha:]]+", file_contents); |
|---|
| 238 | test_find_all("[[:alpha:]]+ing", file_contents); |
|---|
| 239 | test_find_all("^[^\n]*?Twain", file_contents); |
|---|
| 240 | test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); |
|---|
| 241 | test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); |
|---|
| 242 | } |
|---|
| 243 | output_html_results(false, "%short_twain_search%"); |
|---|
| 244 | |
|---|
| 245 | if(test_long_twain) |
|---|
| 246 | { |
|---|
| 247 | load_file(file_contents, "mtent13.txt"); |
|---|
| 248 | |
|---|
| 249 | test_find_all("Twain", file_contents); |
|---|
| 250 | test_find_all("Huck[[:alpha:]]+", file_contents); |
|---|
| 251 | test_find_all("[[:alpha:]]+ing", file_contents); |
|---|
| 252 | test_find_all("^[^\n]*?Twain", file_contents); |
|---|
| 253 | test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); |
|---|
| 254 | time_posix = false; |
|---|
| 255 | test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); |
|---|
| 256 | time_posix = true; |
|---|
| 257 | } |
|---|
| 258 | output_html_results(false, "%long_twain_search%"); |
|---|
| 259 | |
|---|
| 260 | output_final_html(); |
|---|
| 261 | return 0; |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | |
|---|