#ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__ #define DATE_TIME_FORMAT_DATE_PARSER_HPP__ /* Copyright (c) 2004-2005 CrystalClear Software, Inc. * Use, modification and distribution is subject to the * Boost Software License, Version 1.0. (See accompanying * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) * Author: Jeff Garland, Bart Garst * $Date: 2008-11-12 14:37:53 -0500 (Wed, 12 Nov 2008) $ */ #include "boost/lexical_cast.hpp" #include "boost/date_time/string_parse_tree.hpp" #include "boost/date_time/strings_from_facet.hpp" #include "boost/date_time/special_values_parser.hpp" #include #include #include #include #ifndef BOOST_NO_STDC_NAMESPACE # include #else # include #endif #ifdef BOOST_NO_STDC_NAMESPACE namespace std { using ::isspace; using ::isdigit; } #endif namespace boost { namespace date_time { //! Helper function for parsing fixed length strings into integers /*! Will consume 'length' number of characters from stream. Consumed * character are transfered to parse_match_result struct. * Returns '-1' if no number can be parsed or incorrect number of * digits in stream. */ template inline int_type fixed_string_to_int(std::istreambuf_iterator& itr, std::istreambuf_iterator& stream_end, parse_match_result& mr, unsigned int length, const charT& fill_char) { //typedef std::basic_string string_type; unsigned int j = 0; //string_type s; while (j < length && itr != stream_end && (std::isdigit(*itr) || *itr == fill_char)) { if(*itr == fill_char) { /* Since a fill_char can be anything, we convert it to a zero. * lexical_cast will behave predictably when zero is used as fill. */ mr.cache += ('0'); } else { mr.cache += (*itr); } itr++; j++; } int_type i = -1; // mr.cache will hold leading zeros. size() tells us when input is too short. if(mr.cache.size() < length) { return i; } try { i = boost::lexical_cast(mr.cache); }catch(bad_lexical_cast&){ // we want to return -1 if the cast fails so nothing to do here } return i; } //! Helper function for parsing fixed length strings into integers /*! Will consume 'length' number of characters from stream. Consumed * character are transfered to parse_match_result struct. * Returns '-1' if no number can be parsed or incorrect number of * digits in stream. */ template inline int_type fixed_string_to_int(std::istreambuf_iterator& itr, std::istreambuf_iterator& stream_end, parse_match_result& mr, unsigned int length) { return fixed_string_to_int(itr, stream_end, mr, length, '0'); } //! Helper function for parsing varied length strings into integers /*! Will consume 'max_length' characters from stream only if those * characters are digits. Returns '-1' if no number can be parsed. * Will not parse a number preceeded by a '+' or '-'. */ template inline int_type var_string_to_int(std::istreambuf_iterator& itr, std::istreambuf_iterator& /* stream_end */, unsigned int max_length) { typedef std::basic_string string_type; unsigned int j = 0; string_type s; while ((j < max_length) && std::isdigit(*itr)) { s += (*itr); itr++; j++; } int_type i = -1; if(s.length() != 0) { i = boost::lexical_cast(s); } return i; } //! Class with generic date parsing using a format string /*! The following is the set of recognized format specifiers - %a - Short weekday name - %A - Long weekday name - %b - Abbreviated month name - %B - Full month name - %d - Day of the month as decimal 01 to 31 - %j - Day of year as decimal from 001 to 366 - %m - Month name as a decimal 01 to 12 - %U - Week number 00 to 53 with first Sunday as the first day of week 1? - %w - Weekday as decimal number 0 to 6 where Sunday == 0 - %W - Week number 00 to 53 where Monday is first day of week 1 - %x - facet default date representation - %y - Year without the century - eg: 04 for 2004 - %Y - Year with century The weekday specifiers (%a and %A) do not add to the date construction, but they provide a way to skip over the weekday names for formats that provide them. todo -- Another interesting feature that this approach could provide is an option to fill in any missing fields with the current values from the clock. So if you have %m-%d the parser would detect the missing year value and fill it in using the clock. todo -- What to do with the %x. %x in the classic facet is just bad... */ template class format_date_parser { public: typedef std::basic_string string_type; typedef std::basic_istringstream stringstream_type; typedef std::istreambuf_iterator stream_itr_type; typedef typename string_type::const_iterator const_itr; typedef typename date_type::year_type year_type; typedef typename date_type::month_type month_type; typedef typename date_type::day_type day_type; typedef typename date_type::duration_type duration_type; typedef typename date_type::day_of_week_type day_of_week_type; typedef typename date_type::day_of_year_type day_of_year_type; typedef string_parse_tree parse_tree_type; typedef typename parse_tree_type::parse_match_result_type match_results; typedef std::vector > input_collection_type; // TODO sv_parser uses its default constructor - write the others format_date_parser(const string_type& format_str, const input_collection_type& month_short_names, const input_collection_type& month_long_names, const input_collection_type& weekday_short_names, const input_collection_type& weekday_long_names) : m_format(format_str), m_month_short_names(month_short_names, 1), m_month_long_names(month_long_names, 1), m_weekday_short_names(weekday_short_names), m_weekday_long_names(weekday_long_names) {} format_date_parser(const string_type& format_str, const std::locale& locale) : m_format(format_str), m_month_short_names(gather_month_strings(locale), 1), m_month_long_names(gather_month_strings(locale, false), 1), m_weekday_short_names(gather_weekday_strings(locale)), m_weekday_long_names(gather_weekday_strings(locale, false)) {} format_date_parser(const format_date_parser& fdp) { this->m_format = fdp.m_format; this->m_month_short_names = fdp.m_month_short_names; this->m_month_long_names = fdp.m_month_long_names; this->m_weekday_short_names = fdp.m_weekday_short_names; this->m_weekday_long_names = fdp.m_weekday_long_names; } string_type format() const { return m_format; } void format(string_type format_str) { m_format = format_str; } void short_month_names(const input_collection_type& month_names) { m_month_short_names = parse_tree_type(month_names, 1); } void long_month_names(const input_collection_type& month_names) { m_month_long_names = parse_tree_type(month_names, 1); } void short_weekday_names(const input_collection_type& weekday_names) { m_weekday_short_names = parse_tree_type(weekday_names); } void long_weekday_names(const input_collection_type& weekday_names) { m_weekday_long_names = parse_tree_type(weekday_names); } date_type parse_date(const string_type& value, const string_type& format_str, const special_values_parser& sv_parser) const { stringstream_type ss(value); stream_itr_type sitr(ss); stream_itr_type stream_end; return parse_date(sitr, stream_end, format_str, sv_parser); } date_type parse_date(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, const special_values_parser& sv_parser) const { return parse_date(sitr, stream_end, m_format, sv_parser); } /*! Of all the objects that the format_date_parser can parse, only a * date can be a special value. Therefore, only parse_date checks * for special_values. */ date_type parse_date(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str, const special_values_parser& sv_parser) const { bool use_current_char = false; // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } charT current_char = *sitr; short year(0), month(0), day(0), day_of_year(0);// wkday(0); /* Initialized the following to their minimum values. These intermediate * objects are used so we get specific exceptions when part of the input * is unparsable. * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/ year_type t_year(1400); month_type t_month(1); day_type t_day(1); day_of_week_type wkday(0); const_itr itr(format_str.begin()); while (itr != format_str.end() && (sitr != stream_end)) { if (*itr == '%') { itr++; if (*itr != '%') { switch(*itr) { case 'a': { //this value is just throw away. It could be used for //error checking potentially, but it isn't helpful in //actually constructing the date - we just need to get it //out of the stream match_results mr = m_weekday_short_names.match(sitr, stream_end); if(mr.current_match == match_results::PARSE_ERROR) { // check special_values if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } wkday = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'A': { //this value is just throw away. It could be used for //error checking potentially, but it isn't helpful in //actually constructing the date - we just need to get it //out of the stream match_results mr = m_weekday_long_names.match(sitr, stream_end); if(mr.current_match == match_results::PARSE_ERROR) { // check special_values if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } wkday = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'b': { match_results mr = m_month_short_names.match(sitr, stream_end); if(mr.current_match == match_results::PARSE_ERROR) { // check special_values if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_month = month_type(mr.current_match); if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'B': { match_results mr = m_month_long_names.match(sitr, stream_end); if(mr.current_match == match_results::PARSE_ERROR) { // check special_values if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_month = month_type(mr.current_match); if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'd': { match_results mr; day = fixed_string_to_int(sitr, stream_end, mr, 2); if(day == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_day = day_type(day); break; } case 'e': { match_results mr; day = fixed_string_to_int(sitr, stream_end, mr, 2, ' '); if(day == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_day = day_type(day); break; } case 'j': { match_results mr; day_of_year = fixed_string_to_int(sitr, stream_end, mr, 3); if(day_of_year == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } // these next two lines are so we get an exception with bad input day_of_year_type t_day_of_year(1); t_day_of_year = day_of_year_type(day_of_year); break; } case 'm': { match_results mr; month = fixed_string_to_int(sitr, stream_end, mr, 2); if(month == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_month = month_type(month); break; } case 'Y': { match_results mr; year = fixed_string_to_int(sitr, stream_end, mr, 4); if(year == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } t_year = year_type(year); break; } case 'y': { match_results mr; year = fixed_string_to_int(sitr, stream_end, mr, 2); if(year == -1) { if(sv_parser.match(sitr, stream_end, mr)) { return date_type(static_cast(mr.current_match)); } } year += 2000; //make 2 digit years in this century t_year = year_type(year); break; } default: {} //ignore those we don't understand }//switch } else { // itr == '%', second consecutive sitr++; } itr++; //advance past format specifier } else { //skip past chars in format and in buffer itr++; if (use_current_char) { use_current_char = false; current_char = *sitr; } else { sitr++; } } } if (day_of_year > 0) { date_type d(static_cast(year-1),12,31); //end of prior year return d + duration_type(day_of_year); } return date_type(t_year, t_month, t_day); // exceptions were thrown earlier // if input was no good } //! Throws bad_month if unable to parse month_type parse_month(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str) const { match_results mr; return parse_month(sitr, stream_end, format_str, mr); } //! Throws bad_month if unable to parse month_type parse_month(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str, match_results& mr) const { bool use_current_char = false; // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } charT current_char = *sitr; short month(0); const_itr itr(format_str.begin()); while (itr != format_str.end() && (sitr != stream_end)) { if (*itr == '%') { itr++; if (*itr != '%') { switch(*itr) { case 'b': { mr = m_month_short_names.match(sitr, stream_end); month = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'B': { mr = m_month_long_names.match(sitr, stream_end); month = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'm': { month = var_string_to_int(sitr, stream_end, 2); // var_string_to_int returns -1 if parse failed. That will // cause a bad_month exception to be thrown so we do nothing here break; } default: {} //ignore those we don't understand }//switch } else { // itr == '%', second consecutive sitr++; } itr++; //advance past format specifier } else { //skip past chars in format and in buffer itr++; if (use_current_char) { use_current_char = false; current_char = *sitr; } else { sitr++; } } } return month_type(month); // throws bad_month exception when values are zero } //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse day_type parse_var_day_of_month(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end) const { // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } return day_type(var_string_to_int(sitr, stream_end, 2)); } //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse day_type parse_day_of_month(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end) const { // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } //return day_type(var_string_to_int(sitr, stream_end, 2)); match_results mr; return day_type(fixed_string_to_int(sitr, stream_end, mr, 2)); } day_of_week_type parse_weekday(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str) const { match_results mr; return parse_weekday(sitr, stream_end, format_str, mr); } day_of_week_type parse_weekday(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str, match_results& mr) const { bool use_current_char = false; // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } charT current_char = *sitr; short wkday(0); const_itr itr(format_str.begin()); while (itr != format_str.end() && (sitr != stream_end)) { if (*itr == '%') { itr++; if (*itr != '%') { switch(*itr) { case 'a': { //this value is just throw away. It could be used for //error checking potentially, but it isn't helpful in //actually constructing the date - we just need to get it //out of the stream mr = m_weekday_short_names.match(sitr, stream_end); wkday = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'A': { //this value is just throw away. It could be used for //error checking potentially, but it isn't helpful in //actually constructing the date - we just need to get it //out of the stream mr = m_weekday_long_names.match(sitr, stream_end); wkday = mr.current_match; if (mr.has_remaining()) { current_char = mr.last_char(); use_current_char = true; } break; } case 'w': { // weekday as number 0-6, Sunday == 0 wkday = var_string_to_int(sitr, stream_end, 2); break; } default: {} //ignore those we don't understand }//switch } else { // itr == '%', second consecutive sitr++; } itr++; //advance past format specifier } else { //skip past chars in format and in buffer itr++; if (use_current_char) { use_current_char = false; current_char = *sitr; } else { sitr++; } } } return day_of_week_type(wkday); // throws bad_day_of_month exception // when values are zero } //! throws bad_year if unable to parse year_type parse_year(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str) const { match_results mr; return parse_year(sitr, stream_end, format_str, mr); } //! throws bad_year if unable to parse year_type parse_year(std::istreambuf_iterator& sitr, std::istreambuf_iterator& stream_end, string_type format_str, match_results& mr) const { bool use_current_char = false; // skip leading whitespace while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; } charT current_char = *sitr; unsigned short year(0); const_itr itr(format_str.begin()); while (itr != format_str.end() && (sitr != stream_end)) { if (*itr == '%') { itr++; if (*itr != '%') { //match_results mr; switch(*itr) { case 'Y': { // year from 4 digit string year = fixed_string_to_int(sitr, stream_end, mr, 4); break; } case 'y': { // year from 2 digit string (no century) year = fixed_string_to_int(sitr, stream_end, mr, 2); year += 2000; //make 2 digit years in this century break; } default: {} //ignore those we don't understand }//switch } else { // itr == '%', second consecutive sitr++; } itr++; //advance past format specifier } else { //skip past chars in format and in buffer itr++; if (use_current_char) { use_current_char = false; current_char = *sitr; } else { sitr++; } } } return year_type(year); // throws bad_year exception when values are zero } private: string_type m_format; parse_tree_type m_month_short_names; parse_tree_type m_month_long_names; parse_tree_type m_weekday_short_names; parse_tree_type m_weekday_long_names; }; } } //namespace #endif