| 1 | #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ | 
|---|
| 2 | #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__ | 
|---|
| 3 |  | 
|---|
| 4 | /* Copyright (c) 2004-2005 CrystalClear Software, Inc. | 
|---|
| 5 |  * Use, modification and distribution is subject to the  | 
|---|
| 6 |  * Boost Software License, Version 1.0. (See accompanying | 
|---|
| 7 |  * file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0) | 
|---|
| 8 |  * Author: Jeff Garland, Bart Garst | 
|---|
| 9 |  * $Date: 2005/07/01 02:58:57 $ | 
|---|
| 10 |  */ | 
|---|
| 11 |  | 
|---|
| 12 |  | 
|---|
| 13 | #include "boost/lexical_cast.hpp" //error without? | 
|---|
| 14 | #include "boost/algorithm/string/case_conv.hpp"  | 
|---|
| 15 | #include <map> | 
|---|
| 16 | #include <string> | 
|---|
| 17 | #include <vector> | 
|---|
| 18 | #include <algorithm> | 
|---|
| 19 |  | 
|---|
| 20 | namespace boost { namespace date_time { | 
|---|
| 21 |  | 
|---|
| 22 |  | 
|---|
| 23 | template<typename charT> | 
|---|
| 24 | struct parse_match_result | 
|---|
| 25 | { | 
|---|
| 26 |   parse_match_result() : | 
|---|
| 27 |     match_depth(0), | 
|---|
| 28 |     current_match(-1)// -1 is match_not-found value | 
|---|
| 29 |   {} | 
|---|
| 30 |   typedef std::basic_string<charT> string_type; | 
|---|
| 31 |   string_type remaining() const | 
|---|
| 32 |   { | 
|---|
| 33 |     if (match_depth == cache.size()) { | 
|---|
| 34 |       return string_type(); | 
|---|
| 35 |     } | 
|---|
| 36 |     if (current_match == -1) { | 
|---|
| 37 |       return cache; | 
|---|
| 38 |     } | 
|---|
| 39 |     //some of the cache was used return the rest | 
|---|
| 40 |     return string_type(cache, match_depth);  | 
|---|
| 41 |   } | 
|---|
| 42 |   charT last_char() const | 
|---|
| 43 |   { | 
|---|
| 44 |     return cache[cache.size()-1]; | 
|---|
| 45 |   } | 
|---|
| 46 |   //! Returns true if more characters were parsed than was necessary | 
|---|
| 47 |   /*! Should be used in conjunction with last_char()  | 
|---|
| 48 |    * to get the remaining character. */ | 
|---|
| 49 |   bool has_remaining() const | 
|---|
| 50 |   { | 
|---|
| 51 |     return (cache.size() > match_depth); | 
|---|
| 52 |   } | 
|---|
| 53 |    | 
|---|
| 54 |   // cache will hold characters that have been read from the stream | 
|---|
| 55 |   string_type cache; | 
|---|
| 56 |   unsigned short match_depth; | 
|---|
| 57 |   short current_match; | 
|---|
| 58 |   static const short PARSE_ERROR; | 
|---|
| 59 | }; | 
|---|
| 60 | template<typename charT> | 
|---|
| 61 | const short parse_match_result<charT>::PARSE_ERROR = -1; | 
|---|
| 62 |  | 
|---|
| 63 |   //for debug -- really only char streams... | 
|---|
| 64 | template<typename charT> | 
|---|
| 65 | std::basic_ostream<charT>& | 
|---|
| 66 | operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr) | 
|---|
| 67 | { | 
|---|
| 68 |   os << "cm: " << mr.current_match  | 
|---|
| 69 |      << " C: '" << mr.cache  | 
|---|
| 70 |      << "' md: " << mr.match_depth | 
|---|
| 71 |      << " R: " << mr.remaining(); | 
|---|
| 72 |   return os; | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 |  | 
|---|
| 76 |  | 
|---|
| 77 | //! Recursive data structure to allow efficient parsing of various strings | 
|---|
| 78 | /*! This class provides a quick lookup by building what amounts to a | 
|---|
| 79 |  *  tree data structure.  It also features a match function which can | 
|---|
| 80 |  *  can handle nasty input interators by caching values as it recurses | 
|---|
| 81 |  *  the tree so that it can backtrack as needed. | 
|---|
| 82 |  */ | 
|---|
| 83 | template<typename charT> | 
|---|
| 84 | struct string_parse_tree | 
|---|
| 85 | { | 
|---|
| 86 |   typedef std::multimap<charT, string_parse_tree> ptree_coll; | 
|---|
| 87 |   typedef typename ptree_coll::value_type value_type; | 
|---|
| 88 |   typedef typename ptree_coll::iterator iterator; | 
|---|
| 89 |   typedef typename ptree_coll::const_iterator const_iterator; | 
|---|
| 90 |   typedef std::basic_string<charT> string_type; | 
|---|
| 91 |   typedef std::vector<std::basic_string<charT> > collection_type; | 
|---|
| 92 |   typedef parse_match_result<charT> parse_match_result_type; | 
|---|
| 93 |   | 
|---|
| 94 |   /*! Parameter "starting_point" desingates where the numbering begins.  | 
|---|
| 95 |    * A starting_point of zero will start the numbering at zero  | 
|---|
| 96 |    * (Sun=0, Mon=1, ...) were a starting_point of one starts the  | 
|---|
| 97 |    * numbering at one (Jan=1, Feb=2, ...). The default is zero,  | 
|---|
| 98 |    * negative vaules are not allowed */ | 
|---|
| 99 |   string_parse_tree(collection_type names, unsigned int starting_point=0) | 
|---|
| 100 |   { | 
|---|
| 101 |     // iterate thru all the elements and build the tree | 
|---|
| 102 |     unsigned short index = 0; | 
|---|
| 103 |     while (index != names.size() ) { | 
|---|
| 104 |       string_type s = boost::algorithm::to_lower_copy(names[index]); | 
|---|
| 105 |       insert(s, static_cast<unsigned short>(index + starting_point)); | 
|---|
| 106 |       index++; | 
|---|
| 107 |     } | 
|---|
| 108 |     //set the last tree node = index+1  indicating a value | 
|---|
| 109 |     index++; | 
|---|
| 110 |   } | 
|---|
| 111 |  | 
|---|
| 112 |  | 
|---|
| 113 |   string_parse_tree(short value = -1) : | 
|---|
| 114 |     m_value(value) | 
|---|
| 115 |   {} | 
|---|
| 116 |   ptree_coll m_next_chars; | 
|---|
| 117 |   short m_value; | 
|---|
| 118 |  | 
|---|
| 119 |   void insert(const string_type& s, unsigned short value) | 
|---|
| 120 |   { | 
|---|
| 121 |     unsigned int i = 0; | 
|---|
| 122 |     iterator ti; | 
|---|
| 123 |     while(i < s.size()) { | 
|---|
| 124 |       if (i==0) { | 
|---|
| 125 |         if (i == (s.size()-1)) { | 
|---|
| 126 |           ti = m_next_chars.insert(value_type(s[i],  | 
|---|
| 127 |                                               string_parse_tree<charT>(value))); | 
|---|
| 128 |         } | 
|---|
| 129 |         else { | 
|---|
| 130 |           ti = m_next_chars.insert(value_type(s[i],  | 
|---|
| 131 |                                               string_parse_tree<charT>())); | 
|---|
| 132 |         } | 
|---|
| 133 |       } | 
|---|
| 134 |       else { | 
|---|
| 135 |         if (i == (s.size()-1)) { | 
|---|
| 136 |           ti = ti->second.m_next_chars.insert(value_type(s[i],  | 
|---|
| 137 |                                                          string_parse_tree<charT>(value))); | 
|---|
| 138 |         } | 
|---|
| 139 |          | 
|---|
| 140 |         else { | 
|---|
| 141 |           ti = ti->second.m_next_chars.insert(value_type(s[i],  | 
|---|
| 142 |                                                          string_parse_tree<charT>())); | 
|---|
| 143 |         } | 
|---|
| 144 |        | 
|---|
| 145 |       }  | 
|---|
| 146 |       i++; | 
|---|
| 147 |     } | 
|---|
| 148 |   } | 
|---|
| 149 |   | 
|---|
| 150 |    | 
|---|
| 151 |   //! Recursive function that finds a matching string in the tree. | 
|---|
| 152 |   /*! Must check match_results::has_remaining() after match() is  | 
|---|
| 153 |    * called. This is required so the user can determine if  | 
|---|
| 154 |    * stream iterator is already pointing to the expected  | 
|---|
| 155 |    * character or not (match() might advance sitr to next char in stream). | 
|---|
| 156 |    * | 
|---|
| 157 |    * A parse_match_result that has been returned from a failed match  | 
|---|
| 158 |    * attempt can be sent in to the match function of a different  | 
|---|
| 159 |    * string_parse_tree to attempt a match there. Use the iterators  | 
|---|
| 160 |    * for the partially consumed stream, the parse_match_result object,  | 
|---|
| 161 |    * and '0' for the level parameter. */ | 
|---|
| 162 |   short | 
|---|
| 163 |   match(std::istreambuf_iterator<charT>& sitr,  | 
|---|
| 164 |         std::istreambuf_iterator<charT>& stream_end, | 
|---|
| 165 |         parse_match_result_type& result, | 
|---|
| 166 |         unsigned int& level)  const | 
|---|
| 167 |   { | 
|---|
| 168 |  | 
|---|
| 169 |     level++; | 
|---|
| 170 |     charT c; | 
|---|
| 171 |     // if we conditionally advance sitr, we won't have  | 
|---|
| 172 |     // to consume the next character past the input | 
|---|
| 173 |     bool adv_itr = true; | 
|---|
| 174 |     if (level > result.cache.size()) { | 
|---|
| 175 |       if (sitr == stream_end) return 0; //bail - input exhausted | 
|---|
| 176 |       c = static_cast<charT>(std::tolower(*sitr)); | 
|---|
| 177 |       //result.cache += c; | 
|---|
| 178 |       //sitr++; | 
|---|
| 179 |     } | 
|---|
| 180 |     else { | 
|---|
| 181 |       // if we're looking for characters from the cache,  | 
|---|
| 182 |       // we don't want to increment sitr | 
|---|
| 183 |       adv_itr = false;  | 
|---|
| 184 |       c = static_cast<charT>(std::tolower(result.cache[level-1])); | 
|---|
| 185 |     } | 
|---|
| 186 |     const_iterator litr = m_next_chars.lower_bound(c); | 
|---|
| 187 |     const_iterator uitr = m_next_chars.upper_bound(c); | 
|---|
| 188 |     while (litr != uitr) { // equal if not found | 
|---|
| 189 |       if(adv_itr) { | 
|---|
| 190 |         sitr++;  | 
|---|
| 191 |         result.cache += c; | 
|---|
| 192 |       } | 
|---|
| 193 |       if (litr->second.m_value != -1) { // -1 is default value | 
|---|
| 194 |         if (result.match_depth < level) { | 
|---|
| 195 |           result.current_match = litr->second.m_value; | 
|---|
| 196 |           result.match_depth = static_cast<unsigned short>(level); | 
|---|
| 197 |         } | 
|---|
| 198 |         litr->second.match(sitr, stream_end,  | 
|---|
| 199 |                            result, level); | 
|---|
| 200 |         level--; | 
|---|
| 201 |       } | 
|---|
| 202 |       else { | 
|---|
| 203 |         litr->second.match(sitr, stream_end,  | 
|---|
| 204 |                            result, level); | 
|---|
| 205 |         level--; | 
|---|
| 206 |       } | 
|---|
| 207 |      | 
|---|
| 208 |       if(level <= result.cache.size()) { | 
|---|
| 209 |         adv_itr = false; | 
|---|
| 210 |       } | 
|---|
| 211 |  | 
|---|
| 212 |       litr++; | 
|---|
| 213 |     } | 
|---|
| 214 |     return result.current_match; | 
|---|
| 215 |  | 
|---|
| 216 |   } | 
|---|
| 217 |  | 
|---|
| 218 |   /*! Must check match_results::has_remaining() after match() is  | 
|---|
| 219 |    * called. This is required so the user can determine if  | 
|---|
| 220 |    * stream iterator is already pointing to the expected  | 
|---|
| 221 |    * character or not (match() might advance sitr to next char in stream). | 
|---|
| 222 |    */ | 
|---|
| 223 |   parse_match_result_type | 
|---|
| 224 |   match(std::istreambuf_iterator<charT>& sitr,  | 
|---|
| 225 |         std::istreambuf_iterator<charT>& stream_end) const | 
|---|
| 226 |   { | 
|---|
| 227 |     // lookup to_lower of char in tree. | 
|---|
| 228 |     unsigned int level = 0; | 
|---|
| 229 |     //    string_type cache; | 
|---|
| 230 |     parse_match_result_type result; | 
|---|
| 231 |     match(sitr, stream_end, result, level); | 
|---|
| 232 |     return result; | 
|---|
| 233 |   } | 
|---|
| 234 |  | 
|---|
| 235 |   void printme(std::ostream& os, int& level) | 
|---|
| 236 |   { | 
|---|
| 237 |     level++; | 
|---|
| 238 |     iterator itr = m_next_chars.begin(); | 
|---|
| 239 |     iterator end = m_next_chars.end(); | 
|---|
| 240 |     //    os << "starting level: " << level << std::endl; | 
|---|
| 241 |     while (itr != end) { | 
|---|
| 242 |       os << "level:  " << level  | 
|---|
| 243 |          << " node:  " << itr->first  | 
|---|
| 244 |          << " value: " << itr->second.m_value | 
|---|
| 245 |          << std::endl; | 
|---|
| 246 |       itr->second.printme(os, level); | 
|---|
| 247 |       itr++; | 
|---|
| 248 |     } | 
|---|
| 249 |     level--; | 
|---|
| 250 |   } | 
|---|
| 251 |  | 
|---|
| 252 |   void print(std::ostream& os) | 
|---|
| 253 |   { | 
|---|
| 254 |     int level = 0; | 
|---|
| 255 |     printme(os, level); | 
|---|
| 256 |   } | 
|---|
| 257 |      | 
|---|
| 258 |   void printmatch(std::ostream& os, charT c) | 
|---|
| 259 |   { | 
|---|
| 260 |     iterator litr = m_next_chars.lower_bound(c); | 
|---|
| 261 |     iterator uitr = m_next_chars.upper_bound(c); | 
|---|
| 262 |     os << "matches for: " << c << std::endl; | 
|---|
| 263 |     while (litr != uitr) { | 
|---|
| 264 |       os << " node:  " << litr->first  | 
|---|
| 265 |          << " value: " << litr->second.m_value | 
|---|
| 266 |          << std::endl; | 
|---|
| 267 |       litr++; | 
|---|
| 268 |     } | 
|---|
| 269 |   } | 
|---|
| 270 |  | 
|---|
| 271 | }; | 
|---|
| 272 |  | 
|---|
| 273 |  | 
|---|
| 274 | } } //namespace | 
|---|
| 275 | #endif | 
|---|