Main Page | Class Hierarchy | Class List | Directories | File List | Class Members

regexp_parser.h

00001 /***************************************************************************
00002  *   Copyright (C) 2004-2006 by Radko Mihal                                *
00003  *   rmihal@pobox.sk                                                       *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.              *
00019  ***************************************************************************/
00020 #ifndef GRAMMAR_TO_PARSERREGEXP_PARSER_H
00021 #define GRAMMAR_TO_PARSERREGEXP_PARSER_H
00022 
00023 #include "extended_regular_expression.h"
00024 
00025 namespace grammar_to_parser {
00026 
00027 const char* REGEXP_PARSER = "regexpParser";
00028 
00038 template< typename E >
00039 class basic_regexp_parser : public basic_object_parser< E, std::basic_string<E> >
00040 {
00041 public:
00042         basic_regexp_parser( std::basic_string<E> regular_expression ) : 
00043                 basic_object_parser< E, std::basic_string<E> >(REGEXP_PARSER),
00044                 m_regular_expression( regular_expression ) {};
00045 
00046         ~basic_regexp_parser() {};
00047         
00049         bool is_pattern_valid()
00050         {
00051                 if( !m_regexp.is_parsed() )
00052                 {
00053                         m_regexp.parse( m_regular_expression.c_str(), 
00054                                                         m_regular_expression.length() );
00055                 }
00056                 return m_regexp.is_parsed();
00057         }
00058                 
00060 
00066         virtual unsigned long parse( const E *buf, const unsigned long buf_length )
00067         {
00068                 prepare_for_parsing();
00069                 
00070                 if( is_valid() )
00071                 {
00072                          m_parsed_size = m_regexp->recognize( buf, buf_length );
00073                 }
00074                 else
00075                 {
00076                         return 0;
00077                 }
00078                 if( m_regexp->is_recognized() )
00079                 {
00080                         std_auto_ptr::operator = 
00081                                 ( std_auto_ptr( new std::basic_string<E>( buf, m_parsed_size ) ) );
00082                         m_is_parsed = true;
00083                 }
00084                 return m_parsed_size;
00085         }
00086         
00088 
00093         virtual std::basic_istream<E>& parse( std::basic_istream<E>& is )
00094         {
00095                 prepare_for_parsing();
00096 
00097                 // TODO - implement to pass istream to regexp - now limited to input in one line
00098                 std::basic_stringbuf<E> buf;
00099                 std::streampos pos = is.tellg();
00100                 if( is.good() )
00101                 {
00102                         is.get( buf );
00103                         unsigned long length = buf.str().length();
00104                         if( is_valid() )
00105                         {
00106                                  m_regexp->recognize( buf.str().c_str(), length );
00107                         }
00108                         else
00109                         {
00110                                 return is;
00111                         }
00112                         if( m_regexp->is_recognized() )
00113                         {
00114                                 m_parsed_size = m_regexp->recognized_position() + m_regexp->recognized_size();
00115                                 std_auto_ptr::operator = 
00116                                         ( std_auto_ptr( new std::basic_string<E>( buf.str().c_str(), m_parsed_size ) ) );
00117                                 m_is_parsed = true;
00118                                 pos += m_parsed_size;
00119                                 is.clear();
00120                                 is.seekg(pos);
00121                         }
00122                         else
00123                         {
00124                                 is.clear();
00125                                 is.seekg(pos);
00126                         }
00127                 }
00128                 return is;
00129         }
00130         
00132         unsigned long recognized_position()
00133         {
00134                 if( is_pattern_valid() && m_regexp->is_recognized() )
00135                 {
00136                         return m_regexp->recognized_position();
00137                 }
00138                 else
00139                 {
00140                         return 0;
00141                 }
00142         }
00143 
00145         unsigned long recognized_size()
00146         {
00147                 if( is_pattern_valid() && m_regexp->is_recognized() )
00148                 {
00149                         return m_regexp->recognized_size();
00150                 }
00151                 else
00152                 {
00153                         return 0;
00154                 }
00155         }
00156         
00158 
00181         void assign_matches( extended_regular_expression::matches& m )
00182         {
00183                 if( is_pattern_valid() && m_regexp->is_recognized() )
00184                 {
00185                         m_regexp->assign_matches(m);
00186                 }
00187         }
00188         
00190         virtual std::basic_ostream<E>& format( std::basic_ostream<E> &os )
00191         {
00192                 os << get_valid();
00193                 return os;
00194         }
00195         
00197         operator std::basic_string<E>()
00198         {
00199         return get_valid();
00200         }       
00201         
00202 protected:
00203         std::basic_string<E>                                                            m_regular_expression;
00204         basic_non_terminal< E, extended_regular_expression::ere<E> >    m_regexp;
00205 
00206 protected:
00207     void prepare_for_parsing()
00208         {
00209                 basic_object_parser< E,std::basic_string<E> >::prepare_for_parsing();
00210                 if( !m_regexp.is_parsed() )
00211                 {
00212                         m_regexp.parse( m_regular_expression.c_str(), 
00213                                                         m_regular_expression.length() );
00214                 }
00215         }
00216         
00217 };
00218 
00219 typedef basic_regexp_parser<char> regexp_parser;
00220 typedef basic_regexp_parser<wchar_t> wregexp_parser;
00221 
00222 };
00223 
00224 #endif

Generated on Sun Jul 2 18:39:43 2006 for grammar2parser.kdevelop by  doxygen 1.4.1