Main Page | Class Hierarchy | Class List | File List | Class Members

lexer.h

00001 /*********************************************************************
00002  *
00003  * Condor ClassAd library
00004  * Copyright (C) 1990-2003, Condor Team, Computer Sciences Department,
00005  * University of Wisconsin-Madison, WI and Rajesh Raman.
00006  *
00007  * This source code is covered by the Condor Public License, which can
00008  * be found in the accompanying LICENSE file, or online at
00009  * www.condorproject.org.
00010  *
00011  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00012  * AND THE UNIVERSITY OF WISCONSIN-MADISON "AS IS" AND ANY EXPRESS OR
00013  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00014  * WARRANTIES OF MERCHANTABILITY, OF SATISFACTORY QUALITY, AND FITNESS
00015  * FOR A PARTICULAR PURPOSE OR USE ARE DISCLAIMED. THE COPYRIGHT
00016  * HOLDERS AND CONTRIBUTORS AND THE UNIVERSITY OF WISCONSIN-MADISON
00017  * MAKE NO MAKE NO REPRESENTATION THAT THE SOFTWARE, MODIFICATIONS,
00018  * ENHANCEMENTS OR DERIVATIVE WORKS THEREOF, WILL NOT INFRINGE ANY
00019  * PATENT, COPYRIGHT, TRADEMARK, TRADE SECRET OR OTHER PROPRIETARY
00020  * RIGHT.
00021  *
00022  *********************************************************************/
00023 
00024 #ifndef __LEXER_H__
00025 #define __LEXER_H__
00026 
00027 #include "common.h"
00028 #include "value.h"
00029 #include "lexerSource.h"
00030 
00031 BEGIN_NAMESPACE( classad )
00032 
00033 
00034 // the lexical analyzer class
00035 class Lexer
00036 {
00037         public:
00038                 enum TokenType
00039                 {
00040                         LEX_TOKEN_ERROR,
00041                         LEX_END_OF_INPUT,
00042                         LEX_TOKEN_TOO_LONG,
00043                         LEX_INTEGER_VALUE,
00044                         LEX_REAL_VALUE,
00045                         LEX_BOOLEAN_VALUE,
00046                         LEX_STRING_VALUE,
00047                         LEX_UNDEFINED_VALUE,
00048                         LEX_ERROR_VALUE,
00049                         LEX_IDENTIFIER,
00050                         LEX_SELECTION,
00051                         LEX_MULTIPLY,
00052                         LEX_DIVIDE,
00053                         LEX_MODULUS,
00054                         LEX_PLUS,
00055                         LEX_MINUS,
00056                         LEX_BITWISE_AND,
00057                         LEX_BITWISE_OR,
00058                         LEX_BITWISE_NOT,
00059                         LEX_BITWISE_XOR,
00060                         LEX_LEFT_SHIFT,
00061                         LEX_RIGHT_SHIFT,
00062                         LEX_URIGHT_SHIFT,
00063                         LEX_LOGICAL_AND,
00064                         LEX_LOGICAL_OR,
00065                         LEX_LOGICAL_NOT,
00066                         LEX_LESS_THAN,
00067                         LEX_LESS_OR_EQUAL,
00068                         LEX_GREATER_THAN,
00069                         LEX_GREATER_OR_EQUAL,
00070                         LEX_EQUAL,
00071                         LEX_NOT_EQUAL,
00072                         LEX_META_EQUAL,
00073                         LEX_META_NOT_EQUAL,
00074                         LEX_BOUND_TO,
00075                         LEX_QMARK,
00076                         LEX_COLON,
00077                         LEX_COMMA,
00078                         LEX_SEMICOLON,
00079                         LEX_OPEN_BOX,
00080                         LEX_CLOSE_BOX,
00081                         LEX_OPEN_PAREN,
00082                         LEX_CLOSE_PAREN,
00083                         LEX_OPEN_BRACE,
00084                         LEX_CLOSE_BRACE,
00085                         LEX_BACKSLASH,
00086                         LEX_ABSOLUTE_TIME_VALUE,
00087                         LEX_RELATIVE_TIME_VALUE
00088                 };
00089 
00090                 class TokenValue
00091                 {
00092                         public:
00093                                 TokenValue( ) {
00094                                         tt                   = LEX_TOKEN_ERROR;
00095                                         factor               = Value::NO_FACTOR;
00096                                         intValue             = 0;
00097                                         realValue            = 0.0;
00098                                         boolValue            = false;
00099                                         relative_secs        = 0;
00100                                         absolute_secs.secs   = 0;
00101                                         absolute_secs.offset = 0;
00102                                 }
00103 
00104                                 ~TokenValue( ) {
00105                                 }
00106 
00107                                 void SetTokenType( TokenType t ) {
00108                                         tt = t;
00109                                 }
00110 
00111                                 void SetIntValue( int i, Value::NumberFactor f) {
00112                                         intValue = i;
00113                                         factor = f;
00114                                 }
00115 
00116                                 void SetRealValue( double r, Value::NumberFactor f ) {
00117                                         realValue = r;
00118                                         factor = f;
00119                                 }
00120 
00121                                 void SetBoolValue( bool b ) {
00122                                         boolValue = b;
00123                                 }
00124 
00125                                 void SetStringValue( const std::string &str) {
00126                                         strValue = str;
00127                                 }
00128 
00129                                 void SetAbsTimeValue( abstime_t asecs ) {
00130                                         absolute_secs = asecs;
00131                                 }
00132 
00133                                 void SetRelTimeValue( double rsecs ) {
00134                                         relative_secs = rsecs;
00135                                 }
00136 
00137                                 TokenType GetTokenType( ) {
00138                                         return tt;
00139                                 }
00140 
00141                                 void GetIntValue( int& i, Value::NumberFactor& f) {
00142                                         i = intValue;
00143                                         f = factor;
00144                                 }
00145 
00146                                 void GetRealValue( double& r, Value::NumberFactor& f ) {
00147                                         r = realValue;
00148                                         f = factor;
00149                                 }
00150 
00151                                 void GetBoolValue( bool& b ) {
00152                                         b = boolValue;
00153                                 }
00154 
00155                                 void GetStringValue( std::string &str ) {
00156                                         str = strValue; 
00157                                 }       
00158 
00159                                 void GetAbsTimeValue( abstime_t& asecs ) {
00160                                         asecs = absolute_secs;
00161                                 }
00162 
00163                                 void GetRelTimeValue( double& rsecs ) {
00164                                         rsecs = relative_secs;
00165                                 }
00166 
00167                                 void CopyFrom( TokenValue &tv ) {
00168                                         tt = tv.tt;
00169                                         factor = tv.factor;
00170                                         intValue = tv.intValue;
00171                                         realValue = tv.realValue;
00172                                         boolValue = tv.boolValue;
00173                                         relative_secs = tv.relative_secs;
00174                                         absolute_secs = tv.absolute_secs;
00175                                         strValue = tv.strValue;
00176                                 }
00177                                         
00178                         private:
00179                                 TokenType                       tt;
00180                                 Value::NumberFactor factor;
00181                                 int                             intValue;
00182                                 double                          realValue;
00183                                 bool                            boolValue;
00184                                 std::string                     strValue;
00185                                 double                          relative_secs;
00186                                 abstime_t           absolute_secs;
00187                 };
00188 
00189                 // ctor/dtor
00190                 Lexer ();
00191                 ~Lexer ();
00192 
00193                 // initialize methods
00194                 bool Initialize(LexerSource *source);
00195                 bool Reinitialize(void);
00196         
00197         bool WasInitialized(void);
00198 
00199                 // cleanup function --- purges strings from string space
00200                 void FinishedParse();
00201                 
00202                 // the 'extract token' functions
00203                 TokenType PeekToken( TokenValue* = 0 );
00204                 TokenType ConsumeToken( TokenValue* = 0 );
00205 
00206                 // internal buffer for token accumulation
00207                 std::string lexBuffer;                                      // the buffer itselfw
00208 
00209                 // miscellaneous functions
00210                 static char *strLexToken (int);                         // string rep'n of token
00211 
00212                 // set debug flag 
00213                 void SetDebug( bool d ) { debug = d; }
00214 
00215         private:
00216                         // grant access to FunctionCall --- for tokenize{Abs,Rel}Time fns
00217                 friend class FunctionCall;
00218                 friend class ClassAdXMLParser;
00219 
00220         // The copy constructor and assignment operator are defined
00221         // to be private so we don't have to write them, or worry about
00222         // them being inappropriately used. The day we want them, we can 
00223         // write them. 
00224         Lexer(const Lexer &lexer)            { return;       }
00225         Lexer &operator=(const Lexer &lexer) { return *this; }
00226 
00227                 // internal state of lexical analyzer
00228         bool        initialized;
00229                 TokenType       tokenType;                      // the integer id of the token
00230                 LexerSource *lexSource;
00231                 int             markedPos;                      // index of marked character
00232                 char            savedChar;                      // stores character when cut
00233                 int             ch;                             // the current character
00234                 int                     lexBufferCount;                         // current offset in lexBuffer
00235                 bool            inString;                                       // lexing a string constant
00236                 bool            accumulating;                           // are we in a token?
00237                 int             debug;                                          // debug flag
00238 
00239                 // cached last token
00240                 TokenValue      yylval;                                         // the token itself
00241                 bool            tokenConsumed;                          // has the token been consumed?
00242 
00243                 // internal lexing functions
00244                 void            wind(void);                                     // consume character from source
00245                 void            mark(void);                                     // mark()s beginning of a token
00246                 void            cut(void);                                      // delimits token
00247 
00248                 // to tokenize the various tokens
00249                 int             tokenizeNumber (void);          // integer or real
00250                 int             tokenizeAlphaHead (void);       // identifiers/reserved strings
00251                 int             tokenizePunctOperator(void);// punctuation and operators
00252                 int         tokenizeString(char delim);//string constants
00253 };
00254 
00255 END_NAMESPACE // classad
00256 
00257 #endif //__LEXER_H__