00001 #ifndef TRANSITIONLEXER_HXX
00002 #define TRANSITIONLEXER_HXX
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #include <iostream>
00043
00044 #include "ParseType.hxx"
00045 #include "libsherpa/EnumSet.hxx"
00046
00047 typedef long ucs4_t;
00048
00049 class PushBack {
00050
00051
00052 std::vector<ucs4_t> stack;
00053
00054 public:
00055 inline void push(ucs4_t c) {
00056 stack.push_back(c);
00057 }
00058
00059 inline long pop() {
00060 if (stack.size() == 0)
00061 return -1;
00062 ucs4_t c = stack[stack.size()-1];
00063 stack.pop_back();
00064 return c;
00065 }
00066
00067 PushBack() {
00068 }
00069 };
00070
00071 enum LayoutFlagValues {
00075 CHECK_FIRST_TOKEN = 0x1u,
00076 };
00077 typedef sherpa::EnumSet<LayoutFlagValues> LayoutFlags;
00078
00079
00080 struct LayoutFrame : public boost::enable_shared_from_this<LayoutFrame> {
00081 bool inserted;
00082 unsigned column;
00083 int precedingToken;
00084 sherpa::LToken tok;
00085
00086 boost::shared_ptr<LayoutFrame> next;
00087
00088 static inline boost::shared_ptr<LayoutFrame>
00089 make(int _precedingToken, bool _inserted, unsigned _column,
00090 const sherpa::LToken& tok) {
00091 LayoutFrame *lf = new LayoutFrame;
00092
00093 lf->inserted = _inserted;
00094 lf->precedingToken = _precedingToken;
00095 lf->column = _column;
00096 lf->tok = tok;
00097
00098 return boost::shared_ptr<LayoutFrame>(lf);
00099 }
00100 };
00101
00114 struct TransitionLexer {
00115 private:
00119 int lastTokType;
00120
00122 sherpa::LToken lastToken;
00123 public:
00124 sherpa::LToken getLastToken()
00125 { return lastToken; }
00126
00127
00128 bool showNextError;
00129
00130 boost::shared_ptr<LayoutFrame> layoutStack;
00131 bool atBeginningOfLine;
00132
00133 void beginBlock(const sherpa::LToken& tok);
00134 void endBlock(const sherpa::LToken& tok);
00135
00136 private:
00137 bool closeToOffset(unsigned offset);
00138 bool conditionallyInsertSemicolon(unsigned offset);
00139
00140
00141 public:
00142 enum LangFlagsValues {
00143 lf_block = 0x1u,
00144 lf_version = 0x2u
00145 };
00146
00147 typedef sherpa::EnumSet<LangFlagsValues> LangFlags;
00148
00149 LangFlags currentLang;
00150
00152 sherpa::LexLoc here;
00153
00162 int num_errors;
00167 bool debug;
00175 bool isRuntimeUoc;
00180 bool ifIdentMode;
00189 bool isCommandLineInput;
00191 std::istream& inStream;
00193 std::ostream& errStream;
00194
00202 unsigned nModules;
00203
00204 private:
00209 std::string thisToken;
00210
00211 PushBack pushBackStack;
00212
00214 ucs4_t getChar();
00216 void ungetChar(ucs4_t);
00217 void ungetThisToken();
00218
00219 bool valid_ascii_symbol(ucs4_t ucs4);
00220
00221 bool valid_operator_start(ucs4_t ucs4);
00222 bool valid_operator_continue(ucs4_t ucs4);
00223
00224 bool valid_ident_start(ucs4_t ucs4);
00225 bool valid_ident_continue(ucs4_t ucs4);
00226 bool valid_ifident_start(ucs4_t ucs4);
00227 bool valid_ifident_continue(ucs4_t ucs4);
00228 bool valid_tv_ident_start(ucs4_t ucs4);
00229 bool valid_tv_ident_continue(ucs4_t ucs4);
00230 public:
00231
00239 TransitionLexer(std::ostream& errStream, std::istream& inStream,
00240 const std::string& origin,
00241 bool commandLineInput);
00242
00245 void ReportParseError(const sherpa::LexLoc& loc, std::string msg);
00248 void ReportParseWarning(const sherpa::LexLoc& loc, std::string msg);
00249
00251 void ReportParseError();
00253 void ReportParseError(std::string msg)
00254 {
00255 ReportParseError(lastToken.loc, msg);
00256 }
00258 inline void ReportParseWarning(std::string msg)
00259 {
00260 ReportParseWarning(lastToken.loc, msg);
00261 }
00262
00264 inline void setDebug(bool showlex)
00265 {
00266 debug = (showlex ? true : false);
00267 }
00268
00271 inline void setIfIdentMode(bool arg)
00272 {
00273 ifIdentMode = arg;
00274 }
00275
00277 int lex(ParseType *yylvalp);
00278
00280 ~TransitionLexer() {}
00281
00286 struct KeyWord {
00287 const char *nm;
00291 LangFlags whichLang;
00292 int tokValue;
00293
00294 KeyWord(const char *_nm, LangFlags _whichLang, int _tokValue);
00295 };
00296
00297 private:
00299 static KeyWord keywords[];
00300
00308 int kwCheck(const char *s, int identType);
00309
00316 sherpa::LexLoc skipWhiteSpaceAndComments();
00317
00324 sherpa::LToken getNextToken();
00325
00326 void showToken(std::ostream& errStream, const sherpa::LToken& tok);
00327
00328 std::vector<sherpa::LToken> pushbackTokens;
00329 inline bool havePushbackToken()
00330 {
00331 return (pushbackTokens.size() != 0);
00332 }
00333
00334 public:
00339 void pushTokenBack(const sherpa::LToken& tok, bool verbose = false);
00340
00341 private:
00342 sherpa::LToken popToken();
00343
00349 sherpa::LToken getNextInputToken();
00350 };
00351
00352 #endif
00353