00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef GRAMMAR_TO_PARSERBASIC_STRING_PARSER_H
00021 #define GRAMMAR_TO_PARSERBASIC_STRING_PARSER_H
00022
00023 #include <sstream>
00024
00025 namespace grammar_to_parser {
00026
00027 template<typename E>
00028 class basic_string_parser :
00029 public basic_value_parser< E, std::basic_string<E> >
00030 {
00031 public:
00032 typedef std::basic_string<E> std_string;
00033 enum white_chars
00034 {
00035 NEW_LINE= '\n',
00036 SPACE = ' ',
00037 TAB = '\t',
00038 CARRIAGE_RETURN = 13,
00039 LINE_FEED = 10
00040 };
00041
00042 enum directives
00043 {
00044 NONE = 0,
00045 NO_NEW_LINES = 1,
00046 NO_SPACES = 2,
00047 NO_TABS = 4,
00048 NO_CARRIAGE_RETURNS = 8,
00049 NO_LINE_FEEDS = 16
00050 };
00051
00052 enum string_type
00053 {
00054 STRING = NONE,
00055 NORMALIZED_STRING = NO_CARRIAGE_RETURNS |
00056 NO_LINE_FEEDS |
00057 NO_TABS,
00058 TOKEN = NO_SPACES |
00059 NO_LINE_FEEDS |
00060 NO_TABS,
00061 NMTOKEN = NO_SPACES |
00062 NO_LINE_FEEDS |
00063 NO_TABS
00064 };
00065
00066 basic_string_parser();
00067 basic_string_parser(const E separator, int a_directives);
00068 ~basic_string_parser();
00069
00070 unsigned long parse( const E* buf, const unsigned long buf_length );
00071 std::basic_istream<E>& parse( std::basic_istream<E>& is );
00072 operator std::basic_string<E>() { return get_valid(); }
00073
00074 protected:
00075 E m_separator;
00076 int m_directives;
00077
00078 };
00079
00080 template< typename E >
00081 basic_string_parser<E>::basic_string_parser() :
00082 basic_value_parser< E, std::basic_string<E> >(),
00083 m_separator(NEW_LINE),
00084 m_directives(NO_NEW_LINES)
00085 {
00086 }
00087
00088 template< typename E >
00089 basic_string_parser<E>::basic_string_parser(const E separator, int a_directives) :
00090 basic_value_parser< E, std::basic_string<E> >(),
00091 m_separator(separator),
00092 m_directives(a_directives)
00093 {
00094 }
00095
00096 template< typename E >
00097 basic_string_parser<E>::~basic_string_parser()
00098 {
00099 }
00100
00101 template< typename E >
00102 unsigned long basic_string_parser<E>::parse( const E* buf,
00103 const unsigned long buf_length )
00104 {
00105 prepare_for_parsing();
00106
00107 m_is_parsed = true;
00108 int i = 0;
00109 for( i=0; i<buf_length && buf[i] != m_separator; i++ )
00110 {
00111 if( m_directives & NO_NEW_LINES && buf[i] == NEW_LINE ||
00112 m_directives & NO_SPACES && buf[i] == SPACE ||
00113 m_directives & NO_TABS && buf[i] == TAB ||
00114 m_directives & NO_CARRIAGE_RETURNS && buf[i] == CARRIAGE_RETURN ||
00115 m_directives & NO_LINE_FEEDS && buf[i] == LINE_FEED )
00116 {
00117 m_is_parsed = false;
00118 break;
00119 }
00120
00121 };
00122
00123 if( m_is_parsed )
00124 {
00125 get_valid() = std::basic_string<E>( buf, i );
00126 m_parsed_size = get_valid().length();
00127 }
00128 return m_parsed_size;
00129 }
00130
00131 template< typename E >
00132 std::basic_istream<E>& basic_string_parser<E>::parse( std::basic_istream<E>& is )
00133 {
00134 prepare_for_parsing();
00135
00136 std::streampos str_pos = is.tellg();
00137 std::basic_stringbuf<E> buf;
00138 is.get( buf, m_separator );
00139 typename std_string::iterator it = buf.str().begin();
00140
00141 m_is_parsed = true;
00142 E input = 0;
00143 while( it != buf.str().end() )
00144 {
00145 input = (*it);
00146 if( m_directives & NO_NEW_LINES && input == NEW_LINE ||
00147 m_directives & NO_SPACES && input == SPACE ||
00148 m_directives & NO_TABS && input == TAB ||
00149 m_directives & NO_CARRIAGE_RETURNS && input == CARRIAGE_RETURN ||
00150 m_directives & NO_LINE_FEEDS && input == LINE_FEED )
00151 {
00152 m_is_parsed = false;
00153 break;
00154 }
00155 it++;
00156 };
00157
00158 if( m_is_parsed )
00159 {
00160 get_valid() = buf.str();
00161 m_parsed_size = get_valid().length();
00162 }
00163 else
00164 {
00165 is.clear();
00166 is.seekg(str_pos);
00167 }
00168 return is;
00169 }
00170
00171 typedef basic_string_parser<char> string_parser;
00172 typedef basic_string_parser<wchar_t> wstring_parser;
00173
00174 };
00175
00176 #endif