#ifndef DATE_TIME_TZ_DB_BASE_HPP__ #define DATE_TIME_TZ_DB_BASE_HPP__ /* Copyright (c) 2003-2005 CrystalClear Software, Inc. * Subject to the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) * Author: Jeff Garland, Bart Garst * $Date: 2008-11-12 14:37:53 -0500 (Wed, 12 Nov 2008) $ */ #include <map> #include <vector> #include <string> #include <sstream> #include <fstream> #include <stdexcept> #include <boost/tokenizer.hpp> #include <boost/shared_ptr.hpp> #include <boost/throw_exception.hpp> #include <boost/date_time/compiler_config.hpp> #include <boost/date_time/time_zone_names.hpp> #include <boost/date_time/time_zone_base.hpp> #include <boost/date_time/time_parsing.hpp> namespace boost { namespace date_time { //! Exception thrown when tz database cannot locate requested data file class data_not_accessible : public std::logic_error { public: data_not_accessible() : std::logic_error(std::string("Unable to locate or access the required datafile.")) {} data_not_accessible(const std::string& filespec) : std::logic_error(std::string("Unable to locate or access the required datafile. Filespec: " + filespec)) {} }; //! Exception thrown when tz database locates incorrect field structure in data file class bad_field_count : public std::out_of_range { public: bad_field_count(const std::string& s) : std::out_of_range(s) {} }; //! Creates a database of time_zones from csv datafile /*! The csv file containing the zone_specs used by the * tz_db_base is intended to be customized by the * library user. When customizing this file (or creating your own) the * file must follow a specific format. * * This first line is expected to contain column headings and is therefore * not processed by the tz_db_base. * * Each record (line) must have eleven fields. Some of those fields can * be empty. Every field (even empty ones) must be enclosed in * double-quotes. * Ex: * @code * "America/Phoenix" <- string enclosed in quotes * "" <- empty field * @endcode * * Some fields represent a length of time. The format of these fields * must be: * @code * "{+|-}hh:mm[:ss]" <- length-of-time format * @endcode * Where the plus or minus is mandatory and the seconds are optional. * * Since some time zones do not use daylight savings it is not always * necessary for every field in a zone_spec to contain a value. All * zone_specs must have at least ID and GMT offset. Zones that use * daylight savings must have all fields filled except: * STD ABBR, STD NAME, DST NAME. You should take note * that DST ABBR is mandatory for zones that use daylight savings * (see field descriptions for further details). * * ******* Fields and their description/details ********* * * ID: * Contains the identifying string for the zone_spec. Any string will * do as long as it's unique. No two ID's can be the same. * * STD ABBR: * STD NAME: * DST ABBR: * DST NAME: * These four are all the names and abbreviations used by the time * zone being described. While any string will do in these fields, * care should be taken. These fields hold the strings that will be * used in the output of many of the local_time classes. * Ex: * @code * time_zone nyc = tz_db.time_zone_from_region("America/New_York"); * local_time ny_time(date(2004, Aug, 30), IS_DST, nyc); * cout << ny_time.to_long_string() << endl; * // 2004-Aug-30 00:00:00 Eastern Daylight Time * cout << ny_time.to_short_string() << endl; * // 2004-Aug-30 00:00:00 EDT * @endcode * * NOTE: The exact format/function names may vary - see local_time * documentation for further details. * * GMT offset: * This is the number of hours added to utc to get the local time * before any daylight savings adjustments are made. Some examples * are: America/New_York offset -5 hours, & Africa/Cairo offset +2 hours. * The format must follow the length-of-time format described above. * * DST adjustment: * The amount of time added to gmt_offset when daylight savings is in * effect. The format must follow the length-of-time format described * above. * * DST Start Date rule: * This is a specially formatted string that describes the day of year * in which the transition take place. It holds three fields of it's own, * separated by semicolons. * The first field indicates the "nth" weekday of the month. The possible * values are: 1 (first), 2 (second), 3 (third), 4 (fourth), 5 (fifth), * and -1 (last). * The second field indicates the day-of-week from 0-6 (Sun=0). * The third field indicates the month from 1-12 (Jan=1). * * Examples are: "-1;5;9"="Last Friday of September", * "2;1;3"="Second Monday of March" * * Start time: * Start time is the number of hours past midnight, on the day of the * start transition, the transition takes place. More simply put, the * time of day the transition is made (in 24 hours format). The format * must follow the length-of-time format described above with the * exception that it must always be positive. * * DST End date rule: * See DST Start date rule. The difference here is this is the day * daylight savings ends (transition to STD). * * End time: * Same as Start time. */ template<class time_zone_type, class rule_type> class tz_db_base { public: /* Having CharT as a template parameter created problems * with posix_time::duration_from_string. Templatizing * duration_from_string was not possible at this time, however, * it should be possible in the future (when poor compilers get * fixed or stop being used). * Since this class was designed to use CharT as a parameter it * is simply typedef'd here to ease converting in back to a * parameter the future */ typedef char char_type; typedef typename time_zone_type::base_type time_zone_base_type; typedef typename time_zone_type::time_duration_type time_duration_type; typedef time_zone_names_base<char_type> time_zone_names; typedef boost::date_time::dst_adjustment_offsets<time_duration_type> dst_adjustment_offsets; typedef std::basic_string<char_type> string_type; //! Constructs an empty database tz_db_base() {} //! Process csv data file, may throw exceptions /*! May throw data_not_accessible, or bad_field_count exceptions */ void load_from_file(const std::string& pathspec) { string_type in_str; std::string buff; std::ifstream ifs(pathspec.c_str()); if(!ifs){ boost::throw_exception(data_not_accessible(pathspec)); } std::getline(ifs, buff); // first line is column headings while( std::getline(ifs, buff)) { parse_string(buff); } } //! returns true if record successfully added to map /*! Takes a region name in the form of "America/Phoenix", and a * time_zone object for that region. The id string must be a unique * name that does not already exist in the database. */ bool add_record(const string_type& region, boost::shared_ptr<time_zone_base_type> tz) { typename map_type::value_type p(region, tz); return (m_zone_map.insert(p)).second; } //! Returns a time_zone object built from the specs for the given region /*! Returns a time_zone object built from the specs for the given * region. If region does not exist a local_time::record_not_found * exception will be thrown */ boost::shared_ptr<time_zone_base_type> time_zone_from_region(const string_type& region) const { // get the record typename map_type::const_iterator record = m_zone_map.find(region); if(record == m_zone_map.end()){ return boost::shared_ptr<time_zone_base_type>(); //null pointer } return record->second; } //! Returns a vector of strings holding the time zone regions in the database std::vector<std::string> region_list() const { typedef std::vector<std::string> vector_type; vector_type regions; typename map_type::const_iterator itr = m_zone_map.begin(); while(itr != m_zone_map.end()) { regions.push_back(itr->first); ++itr; } return regions; } private: typedef std::map<string_type, boost::shared_ptr<time_zone_base_type> > map_type; map_type m_zone_map; // start and end rule are of the same type typedef typename rule_type::start_rule::week_num week_num; /* TODO: mechanisms need to be put in place to handle different * types of rule specs. parse_rules() only handles nth_kday * rule types. */ //! parses rule specs for transition day rules rule_type* parse_rules(const string_type& sr, const string_type& er) const { using namespace gregorian; // start and end rule are of the same type, // both are included here for readability typedef typename rule_type::start_rule start_rule; typedef typename rule_type::end_rule end_rule; // these are: [start|end] nth, day, month int s_nth = 0, s_d = 0, s_m = 0; int e_nth = 0, e_d = 0, e_m = 0; split_rule_spec(s_nth, s_d, s_m, sr); split_rule_spec(e_nth, e_d, e_m, er); typename start_rule::week_num s_wn, e_wn; s_wn = get_week_num(s_nth); e_wn = get_week_num(e_nth); return new rule_type(start_rule(s_wn, s_d, s_m), end_rule(e_wn, e_d, e_m)); } //! helper function for parse_rules() week_num get_week_num(int nth) const { typedef typename rule_type::start_rule start_rule; switch(nth){ case 1: return start_rule::first; case 2: return start_rule::second; case 3: return start_rule::third; case 4: return start_rule::fourth; case 5: case -1: return start_rule::fifth; default: // shouldn't get here - add error handling later break; } return start_rule::fifth; // silence warnings } //! splits the [start|end]_date_rule string into 3 ints void split_rule_spec(int& nth, int& d, int& m, string_type rule) const { typedef boost::char_separator<char_type, std::char_traits<char_type> > char_separator_type; typedef boost::tokenizer<char_separator_type, std::basic_string<char_type>::const_iterator, std::basic_string<char_type> > tokenizer; typedef boost::tokenizer<char_separator_type, std::basic_string<char_type>::const_iterator, std::basic_string<char_type> >::iterator tokenizer_iterator; const char_type sep_char[] = { ';', '\0'}; char_separator_type sep(sep_char); tokenizer tokens(rule, sep); // 3 fields tokenizer_iterator tok_iter = tokens.begin(); nth = std::atoi(tok_iter->c_str()); ++tok_iter; d = std::atoi(tok_iter->c_str()); ++tok_iter; m = std::atoi(tok_iter->c_str()); } //! Take a line from the csv, turn it into a time_zone_type. /*! Take a line from the csv, turn it into a time_zone_type, * and add it to the map. Zone_specs in csv file are expected to * have eleven fields that describe the time zone. Returns true if * zone_spec successfully added to database */ bool parse_string(string_type& s) { std::vector<string_type> result; typedef boost::token_iterator_generator<boost::escaped_list_separator<char_type>, string_type::const_iterator, string_type >::type token_iter_type; token_iter_type i = boost::make_token_iterator<string_type>(s.begin(), s.end(),boost::escaped_list_separator<char_type>()); token_iter_type end; while (i != end) { result.push_back(*i); i++; } enum db_fields { ID, STDABBR, STDNAME, DSTABBR, DSTNAME, GMTOFFSET, DSTADJUST, START_DATE_RULE, START_TIME, END_DATE_RULE, END_TIME, FIELD_COUNT }; //take a shot at fixing gcc 4.x error const unsigned int expected_fields = static_cast<unsigned int>(FIELD_COUNT); if (result.size() != expected_fields) { std::ostringstream msg; msg << "Expecting " << FIELD_COUNT << " fields, got " << result.size() << " fields in line: " << s; boost::throw_exception(bad_field_count(msg.str())); BOOST_DATE_TIME_UNREACHABLE_EXPRESSION(return false); // should never reach } // initializations bool has_dst = true; if(result[DSTABBR] == std::string()){ has_dst = false; } // start building components of a time_zone time_zone_names names(result[STDNAME], result[STDABBR], result[DSTNAME], result[DSTABBR]); time_duration_type utc_offset = str_from_delimited_time_duration<time_duration_type,char_type>(result[GMTOFFSET]); dst_adjustment_offsets adjust(time_duration_type(0,0,0), time_duration_type(0,0,0), time_duration_type(0,0,0)); boost::shared_ptr<rule_type> rules; if(has_dst){ adjust = dst_adjustment_offsets( str_from_delimited_time_duration<time_duration_type,char_type>(result[DSTADJUST]), str_from_delimited_time_duration<time_duration_type,char_type>(result[START_TIME]), str_from_delimited_time_duration<time_duration_type,char_type>(result[END_TIME]) ); rules = boost::shared_ptr<rule_type>(parse_rules(result[START_DATE_RULE], result[END_DATE_RULE])); } string_type id(result[ID]); boost::shared_ptr<time_zone_base_type> zone(new time_zone_type(names, utc_offset, adjust, rules)); return (add_record(id, zone)); } }; } } // namespace #endif // DATE_TIME_TZ_DB_BASE_HPP__