Main Page | Class Hierarchy | Class List | File List | Class Members

xmlLexer.h

00001 /*********************************************************************
00002  *
00003  * Condor ClassAd library
00004  * Copyright (C) 1990-2003, Condor Team, Computer Sciences Department,
00005  * University of Wisconsin-Madison, WI and Rajesh Raman.
00006  *
00007  * This source code is covered by the Condor Public License, which can
00008  * be found in the accompanying LICENSE file, or online at
00009  * www.condorproject.org.
00010  *
00011  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00012  * AND THE UNIVERSITY OF WISCONSIN-MADISON "AS IS" AND ANY EXPRESS OR
00013  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00014  * WARRANTIES OF MERCHANTABILITY, OF SATISFACTORY QUALITY, AND FITNESS
00015  * FOR A PARTICULAR PURPOSE OR USE ARE DISCLAIMED. THE COPYRIGHT
00016  * HOLDERS AND CONTRIBUTORS AND THE UNIVERSITY OF WISCONSIN-MADISON
00017  * MAKE NO MAKE NO REPRESENTATION THAT THE SOFTWARE, MODIFICATIONS,
00018  * ENHANCEMENTS OR DERIVATIVE WORKS THEREOF, WILL NOT INFRINGE ANY
00019  * PATENT, COPYRIGHT, TRADEMARK, TRADE SECRET OR OTHER PROPRIETARY
00020  * RIGHT.
00021  *
00022  *********************************************************************/
00023 
00024 #ifndef __XMLLEXER_H__
00025 #define __XMLLEXER_H__
00026 
00027 #include "common.h"
00028 #include "lexerSource.h"
00029 #include <map>
00030 
00031 BEGIN_NAMESPACE( classad )
00032 
00033 typedef std::map<std::string, std::string> XMLAttributes;
00034 typedef std::map<std::string, std::string>::iterator XMLAttributesIterator;
00035 
00036 // the lexical analyzer class
00037 class XMLLexer
00038 {
00039  public:
00040         enum TokenType
00041         {
00042                 tokenType_Tag,
00043                 tokenType_Text,
00044                 tokenType_Invalid
00045         };
00046         enum TagType
00047         {
00048                 tagType_Start, // This is for tags like <foo>
00049                 tagType_End,   // This is for tags like </foo>
00050                 tagType_Empty, // This is for tags like <foo/>
00051                 tagType_Invalid
00052         };
00053         enum TagID
00054         {
00055                 tagID_ClassAds,
00056                 tagID_ClassAd,
00057                 tagID_Attribute,
00058                 tagID_Integer,
00059                 tagID_Real,
00060                 tagID_String,
00061                 tagID_Bool,
00062                 tagID_Undefined,
00063                 tagID_Error,
00064                 tagID_AbsoluteTime,
00065                 tagID_RelativeTime,
00066                 tagID_List,
00067                 tagID_Expr,
00068                 tagID_XML,
00069                 tagID_XMLStylesheet,
00070                 tagID_Doctype,
00071                 tagID_NoTag
00072         };
00073         
00074         class Token
00075         {
00076         public:
00077                 Token();
00078                 ~Token();
00079                 void ClearToken(void);
00080                 void DumpToken(void);
00081 
00082                 TokenType      token_type;
00083                 TagType        tag_type;
00084                 TagID          tag_id;
00085                 std::string    text;
00086                 XMLAttributes  attributes;
00087         };
00088 
00089         XMLLexer ();
00090         ~XMLLexer ();
00091 
00092         void SetLexerSource(LexerSource *source);
00093 
00094         // the 'extract token' functions
00095         bool PeekToken(Token* token);
00096         bool ConsumeToken(Token *token);
00097 
00098  private:
00099         Token       current_token;
00100         bool        token_is_valid;
00101         LexerSource *lexer_source;
00102 
00103         bool GrabToken(void);
00104         bool GrabTag(void);
00105         void BreakdownTag(const char *complete_tag);
00106         bool GrabText(void);
00107  private:
00108     
00109     // The copy constructor and assignment operator are defined
00110     // to be private so we don't have to write them, or worry about
00111     // them being inappropriately used. The day we want them, we can 
00112     // write them. 
00113     XMLLexer(const XMLLexer &lexer)            { return;       }
00114     XMLLexer &operator=(const XMLLexer &lexer) { return *this; }
00115 };
00116 
00117 struct xml_tag_mapping
00118 {
00119         char             *tag_name;
00120         XMLLexer::TagID  id;
00121 };
00122 
00123 #define NUMBER_OF_TAG_MAPPINGS (sizeof(tag_mappings)/sizeof(struct xml_tag_mapping))
00124 extern struct xml_tag_mapping tag_mappings[];
00125 
00126 
00127 END_NAMESPACE // classad
00128 
00129 #endif //__LEXER_H__