| 1 | /* |
|---|
| 2 | * |
|---|
| 3 | * Copyright (c) 2004 |
|---|
| 4 | * John Maddock |
|---|
| 5 | * |
|---|
| 6 | * Use, modification and distribution are subject to the |
|---|
| 7 | * Boost Software License, Version 1.0. (See accompanying file |
|---|
| 8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
|---|
| 9 | * |
|---|
| 10 | */ |
|---|
| 11 | |
|---|
| 12 | /* |
|---|
| 13 | * LOCATION: see http://www.boost.org for most recent version. |
|---|
| 14 | * FILE mfc_example.cpp |
|---|
| 15 | * VERSION see <boost/version.hpp> |
|---|
| 16 | * DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types. |
|---|
| 17 | */ |
|---|
| 18 | |
|---|
| 19 | #include <boost/regex/config.hpp> |
|---|
| 20 | |
|---|
| 21 | #ifdef BOOST_HAS_ICU |
|---|
| 22 | |
|---|
| 23 | #include <boost/regex/icu.hpp> |
|---|
| 24 | #include <iostream> |
|---|
| 25 | #include <assert.h> |
|---|
| 26 | |
|---|
| 27 | // |
|---|
| 28 | // Find out if *password* meets our password requirements, |
|---|
| 29 | // as defined by the regular expression *requirements*. |
|---|
| 30 | // |
|---|
| 31 | bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements) |
|---|
| 32 | { |
|---|
| 33 | return boost::u32regex_match(password, boost::make_u32regex(requirements)); |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | // |
|---|
| 37 | // Extract filename part of a path from a UTF-8 encoded std::string and return the result |
|---|
| 38 | // as another std::string: |
|---|
| 39 | // |
|---|
| 40 | std::string get_filename(const std::string& path) |
|---|
| 41 | { |
|---|
| 42 | boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)"); |
|---|
| 43 | boost::smatch what; |
|---|
| 44 | if(boost::u32regex_match(path, what, r)) |
|---|
| 45 | { |
|---|
| 46 | // extract $1 as a CString: |
|---|
| 47 | return what.str(1); |
|---|
| 48 | } |
|---|
| 49 | else |
|---|
| 50 | { |
|---|
| 51 | throw std::runtime_error("Invalid pathname"); |
|---|
| 52 | } |
|---|
| 53 | } |
|---|
| 54 | |
|---|
| 55 | UnicodeString extract_greek(const UnicodeString& text) |
|---|
| 56 | { |
|---|
| 57 | // searches through some UTF-16 encoded text for a block encoded in Greek, |
|---|
| 58 | // this expression is imperfect, but the best we can do for now - searching |
|---|
| 59 | // for specific scripts is actually pretty hard to do right. |
|---|
| 60 | boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*"); |
|---|
| 61 | boost::u16match what; |
|---|
| 62 | if(boost::u32regex_search(text, what, r)) |
|---|
| 63 | { |
|---|
| 64 | // extract $0 as a CString: |
|---|
| 65 | return UnicodeString(what[0].first, what.length(0)); |
|---|
| 66 | } |
|---|
| 67 | else |
|---|
| 68 | { |
|---|
| 69 | throw std::runtime_error("No Greek found!"); |
|---|
| 70 | } |
|---|
| 71 | } |
|---|
| 72 | |
|---|
| 73 | void enumerate_currencies(const std::string& text) |
|---|
| 74 | { |
|---|
| 75 | // enumerate and print all the currency symbols, along |
|---|
| 76 | // with any associated numeric values: |
|---|
| 77 | const char* re = |
|---|
| 78 | "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" |
|---|
| 79 | "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" |
|---|
| 80 | "(?(1)" |
|---|
| 81 | "|(?(2)" |
|---|
| 82 | "[[:Cf:][:Cc:][:Z*:]]*" |
|---|
| 83 | ")" |
|---|
| 84 | "[[:Sc:]]" |
|---|
| 85 | ")"; |
|---|
| 86 | boost::u32regex r = boost::make_u32regex(re); |
|---|
| 87 | boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j; |
|---|
| 88 | while(i != j) |
|---|
| 89 | { |
|---|
| 90 | std::cout << (*i)[0] << std::endl; |
|---|
| 91 | ++i; |
|---|
| 92 | } |
|---|
| 93 | } |
|---|
| 94 | |
|---|
| 95 | void enumerate_currencies2(const std::string& text) |
|---|
| 96 | { |
|---|
| 97 | // enumerate and print all the currency symbols, along |
|---|
| 98 | // with any associated numeric values: |
|---|
| 99 | const char* re = |
|---|
| 100 | "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" |
|---|
| 101 | "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" |
|---|
| 102 | "(?(1)" |
|---|
| 103 | "|(?(2)" |
|---|
| 104 | "[[:Cf:][:Cc:][:Z*:]]*" |
|---|
| 105 | ")" |
|---|
| 106 | "[[:Sc:]]" |
|---|
| 107 | ")"; |
|---|
| 108 | boost::u32regex r = boost::make_u32regex(re); |
|---|
| 109 | boost::u32regex_token_iterator<std::string::const_iterator> |
|---|
| 110 | i(boost::make_u32regex_token_iterator(text, r, 1)), j; |
|---|
| 111 | while(i != j) |
|---|
| 112 | { |
|---|
| 113 | std::cout << *i << std::endl; |
|---|
| 114 | ++i; |
|---|
| 115 | } |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | // |
|---|
| 120 | // Take a credit card number as a string of digits, |
|---|
| 121 | // and reformat it as a human readable string with "-" |
|---|
| 122 | // separating each group of four digit;, |
|---|
| 123 | // note that we're mixing a UTF-32 regex, with a UTF-16 |
|---|
| 124 | // string and a UTF-8 format specifier, and it still all |
|---|
| 125 | // just works: |
|---|
| 126 | // |
|---|
| 127 | const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"); |
|---|
| 128 | const char* human_format = "$1-$2-$3-$4"; |
|---|
| 129 | |
|---|
| 130 | UnicodeString human_readable_card_number(const UnicodeString& s) |
|---|
| 131 | { |
|---|
| 132 | return boost::u32regex_replace(s, e, human_format); |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | |
|---|
| 136 | int main() |
|---|
| 137 | { |
|---|
| 138 | // password checks using u32regex_match: |
|---|
| 139 | UnicodeString pwd = "abcDEF---"; |
|---|
| 140 | UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}"; |
|---|
| 141 | bool b = is_valid_password(pwd, pwd_check); |
|---|
| 142 | assert(b); |
|---|
| 143 | pwd = "abcD-"; |
|---|
| 144 | b = is_valid_password(pwd, pwd_check); |
|---|
| 145 | assert(!b); |
|---|
| 146 | // filename extraction with u32regex_match: |
|---|
| 147 | std::string file = "abc.hpp"; |
|---|
| 148 | file = get_filename(file); |
|---|
| 149 | assert(file == "abc.hpp"); |
|---|
| 150 | file = "c:\\a\\b\\c\\d.h"; |
|---|
| 151 | file = get_filename(file); |
|---|
| 152 | assert(file == "d.h"); |
|---|
| 153 | |
|---|
| 154 | // Greek text extraction with u32regex_search: |
|---|
| 155 | UnicodeString text = L"Some where in \x0391\x039D\x0395\x0398\x0391 2004"; |
|---|
| 156 | UnicodeString greek = extract_greek(text); |
|---|
| 157 | assert(greek == L"\x0391\x039D\x0395\x0398\x0391 2004"); |
|---|
| 158 | |
|---|
| 159 | // extract currency symbols with associated value, use iterator interface: |
|---|
| 160 | std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the £ sign encoded in UTF-8 |
|---|
| 161 | enumerate_currencies(text2); |
|---|
| 162 | enumerate_currencies2(text2); |
|---|
| 163 | |
|---|
| 164 | UnicodeString credit_card_number = "1234567887654321"; |
|---|
| 165 | credit_card_number = human_readable_card_number(credit_card_number); |
|---|
| 166 | assert(credit_card_number == "1234-5678-8765-4321"); |
|---|
| 167 | return 0; |
|---|
| 168 | } |
|---|
| 169 | |
|---|
| 170 | #else |
|---|
| 171 | |
|---|
| 172 | #include <iostream> |
|---|
| 173 | |
|---|
| 174 | int main() |
|---|
| 175 | { |
|---|
| 176 | std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>"; |
|---|
| 177 | return 0; |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | |
|---|
| 181 | #endif |
|---|
| 182 | |
|---|