1 | #ifndef DATE_TIME_TZ_DB_BASE_HPP__ |
---|
2 | #define DATE_TIME_TZ_DB_BASE_HPP__ |
---|
3 | |
---|
4 | /* Copyright (c) 2003-2004 CrystalClear Software, Inc. |
---|
5 | * Subject to the Boost Software License, Version 1.0. |
---|
6 | * (See accompanying file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0) |
---|
7 | * Author: Jeff Garland, Bart Garst |
---|
8 | * $Date: 2005/05/07 08:49:15 $ |
---|
9 | */ |
---|
10 | |
---|
11 | #include "boost/shared_ptr.hpp" |
---|
12 | #include "boost/date_time/time_zone_names.hpp" |
---|
13 | #include "boost/date_time/time_zone_base.hpp" |
---|
14 | #include "boost/date_time/posix_time/posix_time.hpp" |
---|
15 | #include "boost/tokenizer.hpp" |
---|
16 | #include <string> |
---|
17 | #include <sstream> |
---|
18 | #include <map> |
---|
19 | #include <vector> |
---|
20 | #include <stdexcept> |
---|
21 | #include <fstream> |
---|
22 | |
---|
23 | namespace boost { |
---|
24 | namespace date_time { |
---|
25 | |
---|
26 | |
---|
27 | struct data_not_accessible : public std::logic_error |
---|
28 | { |
---|
29 | data_not_accessible() : std::logic_error(std::string("Unable to locate or access the required datafile.")) {} |
---|
30 | data_not_accessible(const std::string& filespec) : std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) {} |
---|
31 | }; |
---|
32 | struct bad_field_count : public std::out_of_range |
---|
33 | { |
---|
34 | bad_field_count(const std::string& s) : std::out_of_range(s) {} |
---|
35 | }; |
---|
36 | |
---|
37 | //! Creates a database of time_zones from csv datafile |
---|
38 | /*! The csv file containing the zone_specs used by the |
---|
39 | * tz_db_base is intended to be customized by the |
---|
40 | * library user. When customizing this file (or creating your own) the |
---|
41 | * file must follow a specific format. |
---|
42 | * |
---|
43 | * This first line is expected to contain column headings and is therefore |
---|
44 | * not processed by the tz_db_base. |
---|
45 | * |
---|
46 | * Each record (line) must have eleven fields. Some of those fields can |
---|
47 | * be empty. Every field (even empty ones) must be enclosed in |
---|
48 | * double-quotes. |
---|
49 | * Ex: |
---|
50 | * @code |
---|
51 | * "America/Phoenix" <- string enclosed in quotes |
---|
52 | * "" <- empty field |
---|
53 | * @endcode |
---|
54 | * |
---|
55 | * Some fields represent a length of time. The format of these fields |
---|
56 | * must be: |
---|
57 | * @code |
---|
58 | * "{+|-}hh:mm[:ss]" <- length-of-time format |
---|
59 | * @endcode |
---|
60 | * Where the plus or minus is mandatory and the seconds are optional. |
---|
61 | * |
---|
62 | * Since some time zones do not use daylight savings it is not always |
---|
63 | * necessary for every field in a zone_spec to contain a value. All |
---|
64 | * zone_specs must have at least ID and GMT offset. Zones that use |
---|
65 | * daylight savings must have all fields filled except: |
---|
66 | * STD ABBR, STD NAME, DST NAME. You should take note |
---|
67 | * that DST ABBR is mandatory for zones that use daylight savings |
---|
68 | * (see field descriptions for further details). |
---|
69 | * |
---|
70 | * ******* Fields and their description/details ********* |
---|
71 | * |
---|
72 | * ID: |
---|
73 | * Contains the identifying string for the zone_spec. Any string will |
---|
74 | * do as long as it's unique. No two ID's can be the same. |
---|
75 | * |
---|
76 | * STD ABBR: |
---|
77 | * STD NAME: |
---|
78 | * DST ABBR: |
---|
79 | * DST NAME: |
---|
80 | * These four are all the names and abbreviations used by the time |
---|
81 | * zone being described. While any string will do in these fields, |
---|
82 | * care should be taken. These fields hold the strings that will be |
---|
83 | * used in the output of many of the local_time classes. |
---|
84 | * Ex: |
---|
85 | * @code |
---|
86 | * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); |
---|
87 | * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); |
---|
88 | * cout << ny_time.to_long_string() << endl; |
---|
89 | * // 2004-Aug-30 00:00:00 Eastern Daylight Time |
---|
90 | * cout << ny_time.to_short_string() << endl; |
---|
91 | * // 2004-Aug-30 00:00:00 EDT |
---|
92 | * @endcode |
---|
93 | * |
---|
94 | * NOTE: The exact format/function names may vary - see local_time |
---|
95 | * documentation for further details. |
---|
96 | * |
---|
97 | * GMT offset: |
---|
98 | * This is the number of hours added to utc to get the local time |
---|
99 | * before any daylight savings adjustments are made. Some examples |
---|
100 | * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. |
---|
101 | * The format must follow the length-of-time format described above. |
---|
102 | * |
---|
103 | * DST adjustment: |
---|
104 | * The amount of time added to gmt_offset when daylight savings is in |
---|
105 | * effect. The format must follow the length-of-time format described |
---|
106 | * above. |
---|
107 | * |
---|
108 | * DST Start Date rule: |
---|
109 | * This is a specially formatted string that describes the day of year |
---|
110 | * in which the transition take place. It holds three fields of it's own, |
---|
111 | * separated by semicolons. |
---|
112 | * The first field indicates the "nth" weekday of the month. The possible |
---|
113 | * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), |
---|
114 | * and -1 (last). |
---|
115 | * The second field indicates the day-of-week from 0-6 (Sun=0). |
---|
116 | * The third field indicates the month from 1-12 (Jan=1). |
---|
117 | * |
---|
118 | * Examples are: "-1;5;9"="Last Friday of September", |
---|
119 | * "2;1;3"="Second Monday of March" |
---|
120 | * |
---|
121 | * Start time: |
---|
122 | * Start time is the number of hours past midnight, on the day of the |
---|
123 | * start transition, the transition takes place. More simply put, the |
---|
124 | * time of day the transition is made (in 24 hours format). The format |
---|
125 | * must follow the length-of-time format described above with the |
---|
126 | * exception that it must always be positive. |
---|
127 | * |
---|
128 | * DST End date rule: |
---|
129 | * See DST Start date rule. The difference here is this is the day |
---|
130 | * daylight savings ends (transition to STD). |
---|
131 | * |
---|
132 | * End time: |
---|
133 | * Same as Start time. |
---|
134 | */ |
---|
135 | template<class time_zone_type, class rule_type> |
---|
136 | class tz_db_base { |
---|
137 | public: |
---|
138 | /* Having charT as a template parameter created problems |
---|
139 | * with posix_time::duration_from_string. Templatizing |
---|
140 | * duration_from_string was not possible at this time, however, |
---|
141 | * it should be possible in the future (when poor compilers get |
---|
142 | * fixed or stop being used). |
---|
143 | * Since this class was designed to use charT as a parameter it |
---|
144 | * is simply typedef'd here to ease converting in back to a |
---|
145 | * parameter the future */ |
---|
146 | typedef char charT; |
---|
147 | |
---|
148 | typedef typename time_zone_type::base_type time_zone_base_type; |
---|
149 | typedef typename time_zone_type::time_duration_type time_duration_type; |
---|
150 | typedef time_zone_names_base<charT> time_zone_names; |
---|
151 | typedef dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; |
---|
152 | typedef std::basic_string<charT> string_type; |
---|
153 | |
---|
154 | //! Constructs an empty database |
---|
155 | tz_db_base() {} |
---|
156 | |
---|
157 | //! Process csv data file, may throw exceptions |
---|
158 | /*! May throw data_not_accessible, or bad_field_count exceptions */ |
---|
159 | void load_from_file(const std::string& pathspec) |
---|
160 | { |
---|
161 | string_type in_str; |
---|
162 | std::string buff; |
---|
163 | |
---|
164 | std::ifstream ifs(pathspec.c_str()); |
---|
165 | if(!ifs){ |
---|
166 | throw data_not_accessible(pathspec); |
---|
167 | } |
---|
168 | std::getline(ifs, buff); // first line is column headings |
---|
169 | |
---|
170 | while( std::getline(ifs, buff)) { |
---|
171 | parse_string(buff); |
---|
172 | } |
---|
173 | } |
---|
174 | |
---|
175 | //! returns true if record successfully added to map |
---|
176 | /*! Takes an id string in the form of "America/Phoenix", and a |
---|
177 | * time_zone object for that region. The id string must be a unique |
---|
178 | * name that does not already exist in the database. */ |
---|
179 | bool add_record(const string_type& id, |
---|
180 | boost::shared_ptr<time_zone_base_type> tz) |
---|
181 | { |
---|
182 | typename map_type::value_type p(id, tz); |
---|
183 | return (m_zone_map.insert(p)).second; |
---|
184 | } |
---|
185 | |
---|
186 | //! Returns a time_zone object built from the specs for the given region |
---|
187 | /*! Returns a time_zone object built from the specs for the given |
---|
188 | * region. If region does not exist a local_time::record_not_found |
---|
189 | * exception will be thrown */ |
---|
190 | boost::shared_ptr<time_zone_base_type> |
---|
191 | time_zone_from_region(const string_type& region) const |
---|
192 | { |
---|
193 | // get the record |
---|
194 | typename map_type::const_iterator record = m_zone_map.find(region); |
---|
195 | if(record == m_zone_map.end()){ |
---|
196 | return boost::shared_ptr<time_zone_base_type>(); //null pointer |
---|
197 | } |
---|
198 | return record->second; |
---|
199 | } |
---|
200 | |
---|
201 | //! Returns a vector of strings holding the time zone regions in the database |
---|
202 | std::vector<std::string> region_list() const |
---|
203 | { |
---|
204 | typedef std::vector<std::string> vector_type; |
---|
205 | vector_type regions; |
---|
206 | typename map_type::const_iterator itr = m_zone_map.begin(); |
---|
207 | while(itr != m_zone_map.end()) { |
---|
208 | regions.push_back(itr->first); |
---|
209 | ++itr; |
---|
210 | } |
---|
211 | return regions; |
---|
212 | } |
---|
213 | |
---|
214 | private: |
---|
215 | typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; |
---|
216 | map_type m_zone_map; |
---|
217 | |
---|
218 | // start and end rule are of the same type |
---|
219 | typedef typename rule_type::start_rule::week_num week_num; |
---|
220 | |
---|
221 | /* TODO: mechanisms need to be put in place to handle different |
---|
222 | * types of rule specs. parse_rules() only handles nth_kday |
---|
223 | * rule types. */ |
---|
224 | |
---|
225 | //! parses rule specs for transition day rules |
---|
226 | rule_type* parse_rules(const string_type& sr, const string_type& er) const |
---|
227 | { |
---|
228 | using namespace gregorian; |
---|
229 | // start and end rule are of the same type, |
---|
230 | // both are included here for readability |
---|
231 | typedef typename rule_type::start_rule start_rule; |
---|
232 | typedef typename rule_type::end_rule end_rule; |
---|
233 | |
---|
234 | // these are: [start|end] nth, day, month |
---|
235 | int s_nth = 0, s_d = 0, s_m = 0; |
---|
236 | int e_nth = 0, e_d = 0, e_m = 0; |
---|
237 | split_rule_spec(s_nth, s_d, s_m, sr); |
---|
238 | split_rule_spec(e_nth, e_d, e_m, er); |
---|
239 | |
---|
240 | typename start_rule::week_num s_wn, e_wn; |
---|
241 | s_wn = get_week_num(s_nth); |
---|
242 | e_wn = get_week_num(e_nth); |
---|
243 | |
---|
244 | |
---|
245 | return new rule_type(start_rule(s_wn, s_d, s_m), |
---|
246 | end_rule(e_wn, e_d, e_m)); |
---|
247 | } |
---|
248 | //! helper function for parse_rules() |
---|
249 | week_num get_week_num(int nth) const |
---|
250 | { |
---|
251 | typedef typename rule_type::start_rule start_rule; |
---|
252 | switch(nth){ |
---|
253 | case 1: |
---|
254 | return start_rule::first; |
---|
255 | case 2: |
---|
256 | return start_rule::second; |
---|
257 | case 3: |
---|
258 | return start_rule::third; |
---|
259 | case 4: |
---|
260 | return start_rule::fourth; |
---|
261 | case 5: |
---|
262 | case -1: |
---|
263 | return start_rule::fifth; |
---|
264 | default: |
---|
265 | // shouldn't get here - add error handling later |
---|
266 | break; |
---|
267 | } |
---|
268 | return start_rule::fifth; // silence warnings |
---|
269 | } |
---|
270 | |
---|
271 | //! splits the [start|end]_date_rule string into 3 ints |
---|
272 | void split_rule_spec(int& nth, int& d, int& m, string_type rule) const |
---|
273 | { |
---|
274 | typedef boost::tokenizer<boost::char_separator<charT>, |
---|
275 | string_type::const_iterator, |
---|
276 | string_type > tokenizer; |
---|
277 | const charT sep_char[] = { ';', '\0'}; |
---|
278 | boost::char_separator<charT> sep(sep_char); |
---|
279 | tokenizer tokens(rule, sep); // 3 fields |
---|
280 | |
---|
281 | typename tokenizer::iterator tok_iter = tokens.begin(); |
---|
282 | nth = std::atoi(tok_iter->c_str()); ++tok_iter; |
---|
283 | d = std::atoi(tok_iter->c_str()); ++tok_iter; |
---|
284 | m = std::atoi(tok_iter->c_str()); |
---|
285 | } |
---|
286 | |
---|
287 | |
---|
288 | //! Take a line from the csv, turn it into a time_zone_type. |
---|
289 | /*! Take a line from the csv, turn it into a time_zone_type, |
---|
290 | * and add it to the map. Zone_specs in csv file are expected to |
---|
291 | * have eleven fields that describe the time zone. Returns true if |
---|
292 | * zone_spec successfully added to database */ |
---|
293 | bool parse_string(string_type& s) |
---|
294 | { |
---|
295 | |
---|
296 | std::vector<string_type> result; |
---|
297 | typedef boost::token_iterator_generator<boost::escaped_list_separator<charT>, string_type::const_iterator, string_type >::type token_iter_type; |
---|
298 | |
---|
299 | token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<charT>()); |
---|
300 | |
---|
301 | token_iter_type end; |
---|
302 | while (i != end) { |
---|
303 | result.push_back(*i); |
---|
304 | i++; |
---|
305 | } |
---|
306 | |
---|
307 | enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, |
---|
308 | DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, |
---|
309 | END_TIME, FIELD_COUNT }; |
---|
310 | |
---|
311 | //take a shot at fixing gcc 4.x error |
---|
312 | const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); |
---|
313 | if (result.size() != expected_fields) { |
---|
314 | std::stringstream msg; |
---|
315 | msg << "Expecting " << FIELD_COUNT << " fields, got " |
---|
316 | << result.size() << " fields in line: " << s; |
---|
317 | throw bad_field_count(msg.str()); |
---|
318 | } |
---|
319 | |
---|
320 | // initializations |
---|
321 | bool has_dst = true; |
---|
322 | if(result[DSTABBR] == std::string()){ |
---|
323 | has_dst = false; |
---|
324 | } |
---|
325 | |
---|
326 | |
---|
327 | // start building components of a time_zone |
---|
328 | time_zone_names names(result[STDNAME], result[STDABBR], |
---|
329 | result[DSTNAME], result[DSTABBR]); |
---|
330 | |
---|
331 | time_duration_type utc_offset = |
---|
332 | posix_time::duration_from_string(result[GMTOFFSET]); |
---|
333 | |
---|
334 | dst_adjustment_offsets adjust(time_duration_type(0,0,0), |
---|
335 | time_duration_type(0,0,0), |
---|
336 | time_duration_type(0,0,0)); |
---|
337 | |
---|
338 | boost::shared_ptr<rule_type> rules; |
---|
339 | |
---|
340 | if(has_dst){ |
---|
341 | adjust = dst_adjustment_offsets( |
---|
342 | posix_time::duration_from_string(result[DSTADJUST]), |
---|
343 | posix_time::duration_from_string(result[START_TIME]), |
---|
344 | posix_time::duration_from_string(result[END_TIME]) |
---|
345 | ); |
---|
346 | |
---|
347 | rules = |
---|
348 | boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], |
---|
349 | result[END_DATE_RULE])); |
---|
350 | } |
---|
351 | string_type id(result[ID]); |
---|
352 | boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); |
---|
353 | return (add_record(id, zone)); |
---|
354 | |
---|
355 | } |
---|
356 | |
---|
357 | }; |
---|
358 | |
---|
359 | } } // namespace |
---|
360 | |
---|
361 | #endif // DATE_TIME_TZ_DB_BASE_HPP__ |
---|