summaryrefslogtreecommitdiffstats
blob: a6d8ea9e0a63d8e6cd40367dfb9803cd17a96d10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
#ifndef DATE_TIME_TZ_DB_BASE_HPP__
#define DATE_TIME_TZ_DB_BASE_HPP__

/* Copyright (c) 2003-2005 CrystalClear Software, Inc.
 * Subject to the Boost Software License, Version 1.0. 
 * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
 * Author: Jeff Garland, Bart Garst
 * $Date: 2012-09-22 09:04:10 -0700 (Sat, 22 Sep 2012) $
 */

#include <map>
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include <stdexcept>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/throw_exception.hpp>
#include <boost/date_time/compiler_config.hpp>
#include <boost/date_time/time_zone_names.hpp>
#include <boost/date_time/time_zone_base.hpp>
#include <boost/date_time/time_parsing.hpp>

namespace boost {
  namespace date_time {

    //! Exception thrown when tz database cannot locate requested data file
    class data_not_accessible : public std::logic_error
    {
     public:
       data_not_accessible() : 
         std::logic_error(std::string("Unable to locate or access the required datafile.")) 
       {}
       data_not_accessible(const std::string& filespec) : 
         std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) 
       {}
    };
    
    //! Exception thrown when tz database locates incorrect field structure in data file
    class bad_field_count : public std::out_of_range
    {
     public:
       bad_field_count(const std::string& s) : 
         std::out_of_range(s) 
      {}
    };

    //! Creates a database of time_zones from csv datafile
    /*! The csv file containing the zone_specs used by the
     * tz_db_base is intended to be customized by the
     * library user. When customizing this file (or creating your own) the
     * file must follow a specific format.
     * 
     * This first line is expected to contain column headings and is therefore
     * not processed by the tz_db_base.
     *
     * Each record (line) must have eleven fields. Some of those fields can
     * be empty. Every field (even empty ones) must be enclosed in 
     * double-quotes.
     * Ex:
     * @code
     * "America/Phoenix" <- string enclosed in quotes
     * ""                <- empty field
     * @endcode
     * 
     * Some fields represent a length of time. The format of these fields 
     * must be:
     * @code
     * "{+|-}hh:mm[:ss]" <- length-of-time format
     * @endcode
     * Where the plus or minus is mandatory and the seconds are optional.
     * 
     * Since some time zones do not use daylight savings it is not always 
     * necessary for every field in a zone_spec to contain a value. All 
     * zone_specs must have at least ID and GMT offset. Zones that use 
     * daylight savings must have all fields filled except: 
     * STD ABBR, STD NAME, DST NAME. You should take note 
     * that DST ABBR is mandatory for zones that use daylight savings 
     * (see field descriptions for further details).
     *
     * ******* Fields and their description/details ********* 
     *     
     * ID: 
     * Contains the identifying string for the zone_spec. Any string will
     * do as long as it's unique. No two ID's can be the same. 
     *
     * STD ABBR:
     * STD NAME:
     * DST ABBR:
     * DST NAME:
     * These four are all the names and abbreviations used by the time 
     * zone being described. While any string will do in these fields, 
     * care should be taken. These fields hold the strings that will be 
     * used in the output of many of the local_time classes. 
     * Ex:
     * @code
     * time_zone nyc = tz_db.time_zone_from_region("America/New_York");
     * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc);
     * cout << ny_time.to_long_string() << endl;
     * // 2004-Aug-30 00:00:00 Eastern Daylight Time
     * cout << ny_time.to_short_string() << endl;
     * // 2004-Aug-30 00:00:00 EDT
     * @endcode
     *
     * NOTE: The exact format/function names may vary - see local_time 
     * documentation for further details.
     *
     * GMT offset:
     * This is the number of hours added to utc to get the local time 
     * before any daylight savings adjustments are made. Some examples 
     * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours.
     * The format must follow the length-of-time format described above.
     *
     * DST adjustment:
     * The amount of time added to gmt_offset when daylight savings is in 
     * effect. The format must follow the length-of-time format described
     * above.
     *
     * DST Start Date rule:
     * This is a specially formatted string that describes the day of year
     * in which the transition take place. It holds three fields of it's own,
     * separated by semicolons. 
     * The first field indicates the "nth" weekday of the month. The possible 
     * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), 
     * and -1 (last).
     * The second field indicates the day-of-week from 0-6 (Sun=0).
     * The third field indicates the month from 1-12 (Jan=1).
     * 
     * Examples are: "-1;5;9"="Last Friday of September", 
     * "2;1;3"="Second Monday of March"
     *
     * Start time:
     * Start time is the number of hours past midnight, on the day of the
     * start transition, the transition takes place. More simply put, the 
     * time of day the transition is made (in 24 hours format). The format
     * must follow the length-of-time format described above with the 
     * exception that it must always be positive.
     *
     * DST End date rule:
     * See DST Start date rule. The difference here is this is the day 
     * daylight savings ends (transition to STD).
     *
     * End time:
     * Same as Start time.
     */
    template<class time_zone_type, class rule_type>
    class tz_db_base {
    public:
      /* Having CharT as a template parameter created problems 
       * with posix_time::duration_from_string. Templatizing 
       * duration_from_string was not possible at this time, however, 
       * it should be possible in the future (when poor compilers get 
       * fixed or stop being used). 
       * Since this class was designed to use CharT as a parameter it 
       * is simply typedef'd here to ease converting in back to a 
       * parameter the future */
      typedef char char_type;

      typedef typename time_zone_type::base_type time_zone_base_type;
      typedef typename time_zone_type::time_duration_type time_duration_type;
      typedef time_zone_names_base<char_type> time_zone_names;
      typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets;
      typedef std::basic_string<char_type> string_type;

      //! Constructs an empty database
      tz_db_base() {}

      //! Process csv data file, may throw exceptions
      /*! May throw bad_field_count exceptions */
      void load_from_stream(std::istream &in)
      {
        std::string  buff;
        while( std::getline(in, buff)) {
          parse_string(buff);
        }
      }

      //! Process csv data file, may throw exceptions
      /*! May throw data_not_accessible, or bad_field_count exceptions */
      void load_from_file(const std::string& pathspec)
      {
        string_type in_str;
        std::string  buff;
        
        std::ifstream ifs(pathspec.c_str());
        if(!ifs){
          boost::throw_exception(data_not_accessible(pathspec));
        }
        std::getline(ifs, buff); // first line is column headings
        this->load_from_stream(ifs);
      }

      //! returns true if record successfully added to map
      /*! Takes a region name in the form of "America/Phoenix", and a 
       * time_zone object for that region. The id string must be a unique 
       * name that does not already exist in the database. */
      bool add_record(const string_type& region, 
                      boost::shared_ptr<time_zone_base_type> tz)
      {
        typename map_type::value_type p(region, tz); 
        return (m_zone_map.insert(p)).second;
      }

      //! Returns a time_zone object built from the specs for the given region
      /*! Returns a time_zone object built from the specs for the given 
       * region. If region does not exist a local_time::record_not_found 
       * exception will be thrown */
      boost::shared_ptr<time_zone_base_type> 
      time_zone_from_region(const string_type& region) const 
      {
        // get the record
        typename map_type::const_iterator record = m_zone_map.find(region);
        if(record == m_zone_map.end()){
          return boost::shared_ptr<time_zone_base_type>(); //null pointer
        }
        return record->second;
      }

      //! Returns a vector of strings holding the time zone regions in the database
      std::vector<std::string> region_list() const
      {
        typedef std::vector<std::string> vector_type;
        vector_type regions;
        typename map_type::const_iterator itr = m_zone_map.begin();
        while(itr != m_zone_map.end()) {
          regions.push_back(itr->first);
          ++itr;
        }
        return regions;
      }
    
    private:
      typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type;
      map_type m_zone_map;

      // start and end rule are of the same type
      typedef typename rule_type::start_rule::week_num week_num;

      /* TODO: mechanisms need to be put in place to handle different
       * types of rule specs. parse_rules() only handles nth_kday
       * rule types. */
      
      //! parses rule specs for transition day rules
      rule_type* parse_rules(const string_type& sr, const string_type& er) const
      {
        using namespace gregorian;
        // start and end rule are of the same type, 
        // both are included here for readability
        typedef typename rule_type::start_rule start_rule;
        typedef typename rule_type::end_rule end_rule;
       
        // these are: [start|end] nth, day, month
        int s_nth = 0, s_d = 0, s_m = 0;
        int e_nth = 0, e_d = 0, e_m = 0;
        split_rule_spec(s_nth, s_d, s_m, sr);
        split_rule_spec(e_nth, e_d, e_m, er);
        
        typename start_rule::week_num s_wn, e_wn;
        s_wn = get_week_num(s_nth);
        e_wn = get_week_num(e_nth);
        
        
        return new rule_type(start_rule(s_wn, s_d, s_m),
                             end_rule(e_wn, e_d, e_m));
      }
      //! helper function for parse_rules()
      week_num get_week_num(int nth) const
      {
        typedef typename rule_type::start_rule start_rule;
        switch(nth){
        case 1:
          return start_rule::first;
        case 2:
          return start_rule::second;
        case 3:
          return start_rule::third;
        case 4:
          return start_rule::fourth;
        case 5:
        case -1:
          return start_rule::fifth;
        default:
          // shouldn't get here - add error handling later
          break;
        }
        return start_rule::fifth; // silence warnings
      }
          
      //! splits the [start|end]_date_rule string into 3 ints
      void split_rule_spec(int& nth, int& d, int& m, string_type rule) const
      {
        typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type;
        typedef boost::tokenizer<char_separator_type,
                                 std::basic_string<char_type>::const_iterator,
                                 std::basic_string<char_type> > tokenizer;
        typedef boost::tokenizer<char_separator_type,
                                 std::basic_string<char_type>::const_iterator,
                                 std::basic_string<char_type> >::iterator tokenizer_iterator;
        
        const char_type sep_char[] = { ';', '\0'};
        char_separator_type sep(sep_char);
        tokenizer tokens(rule, sep); // 3 fields
        
        tokenizer_iterator tok_iter = tokens.begin(); 
        nth = std::atoi(tok_iter->c_str()); ++tok_iter;
        d   = std::atoi(tok_iter->c_str()); ++tok_iter;
        m   = std::atoi(tok_iter->c_str());
      }

     
      //! Take a line from the csv, turn it into a time_zone_type.
      /*! Take a line from the csv, turn it into a time_zone_type,
       * and add it to the map. Zone_specs in csv file are expected to 
       * have eleven fields that describe the time zone. Returns true if 
       * zone_spec successfully added to database */
      bool parse_string(string_type& s)
      {
        std::vector<string_type> result;
        typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type;

        token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>());

        token_iter_type end;
        while (i != end) {
          result.push_back(*i);
          i++;
        }

        enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET,
                         DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE,
                         END_TIME, FIELD_COUNT };

        //take a shot at fixing gcc 4.x error
        const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT);
        if (result.size() != expected_fields) { 
          std::ostringstream msg;
          msg << "Expecting " << FIELD_COUNT << " fields, got " 
            << result.size() << " fields in line: " << s;
          boost::throw_exception(bad_field_count(msg.str()));
          BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach
        }

        // initializations
        bool has_dst = true; 
        if(result[DSTABBR] == std::string()){
          has_dst = false;
        }


        // start building components of a time_zone
        time_zone_names names(result[STDNAME], result[STDABBR],
                              result[DSTNAME], result[DSTABBR]);

        time_duration_type utc_offset = 
          str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]);
        
        dst_adjustment_offsets adjust(time_duration_type(0,0,0),
                                      time_duration_type(0,0,0),
                                      time_duration_type(0,0,0));

        boost::shared_ptr<rule_type> rules;

        if(has_dst){
          adjust = dst_adjustment_offsets(
                                          str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]),
                                          str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]),
                                          str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME])
                                          );

          rules = 
            boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE],
                                                     result[END_DATE_RULE]));
        }
        string_type id(result[ID]);
        boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules));
        return (add_record(id, zone));
        
      } 
     
    };

} } // namespace

#endif // DATE_TIME_TZ_DB_BASE_HPP__