TransitionLexer.hxx

Go to the documentation of this file.
00001 #ifndef TRANSITIONLEXER_HXX
00002 #define TRANSITIONLEXER_HXX
00003 
00004 /**************************************************************************
00005  *
00006  * Copyright (C) 2010, Jonathan S. Shapiro
00007  * Portions Copyright (C) 2008, Johns Hopkins University
00008  * All rights reserved.
00009  *
00010  * Redistribution and use in source and binary forms, with or
00011  * without modification, are permitted provided that the following
00012  * conditions are met:
00013  *
00014  *   - Redistributions of source code must contain the above 
00015  *     copyright notice, this list of conditions, and the following
00016  *     disclaimer. 
00017  *
00018  *   - Redistributions in binary form must reproduce the above
00019  *     copyright notice, this list of conditions, and the following
00020  *     disclaimer in the documentation and/or other materials 
00021  *     provided with the distribution.
00022  *
00023  *   - Neither the names of the copyright holders nor the names of any
00024  *     of any contributors may be used to endorse or promote products
00025  *     derived from this software without specific prior written
00026  *     permission. 
00027  *
00028  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00029  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00030  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00031  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00032  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00033  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00034  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00035  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00036  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00037  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00038  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00039  *
00040  **************************************************************************/
00041 
00042 #include <iostream>
00043 
00044 #include "ParseType.hxx"
00045 #include "libsherpa/EnumSet.hxx"
00046 
00047 typedef long ucs4_t;
00048 
00049 class PushBack {
00050   // Needs to be the size of the largest token that we might have to
00051   // push back due to layout, but not less than four.
00052   std::vector<ucs4_t> stack;
00053 
00054 public:
00055   inline void push(ucs4_t c) {
00056     stack.push_back(c);
00057   }
00058 
00059   inline long pop() {
00060     if (stack.size() == 0)
00061       return -1;
00062     ucs4_t c = stack[stack.size()-1];
00063     stack.pop_back();
00064     return c;
00065   }
00066 
00067   PushBack() {
00068   }
00069 };
00070 
00071 enum LayoutFlagValues {
00075   CHECK_FIRST_TOKEN = 0x1u,
00076 };
00077 typedef sherpa::EnumSet<LayoutFlagValues> LayoutFlags;
00078 
00079 
00080 struct LayoutFrame : public boost::enable_shared_from_this<LayoutFrame> {
00081   bool inserted;              // true IFF left curly was inserted
00082   unsigned column;            // column of first token after '{'
00083   int precedingToken;         // token that preceded this '{'
00084   sherpa::LToken tok;         // copy of opening { token, for location
00085 
00086   boost::shared_ptr<LayoutFrame> next;
00087 
00088   static inline boost::shared_ptr<LayoutFrame>
00089   make(int _precedingToken, bool _inserted, unsigned _column, 
00090        const sherpa::LToken& tok) {
00091     LayoutFrame *lf = new LayoutFrame;
00092 
00093     lf->inserted = _inserted;
00094     lf->precedingToken = _precedingToken;
00095     lf->column = _column;
00096     lf->tok = tok;
00097 
00098     return boost::shared_ptr<LayoutFrame>(lf);
00099   }
00100 };
00101 
00114 struct TransitionLexer {
00115 private:
00119   int lastTokType;
00120 
00122   sherpa::LToken lastToken;
00123 public:
00124   sherpa::LToken getLastToken()
00125   { return lastToken; }
00126 
00127   // Hack to work around crappy Bison/Yacc error handling strategy:
00128   bool showNextError;
00129 
00130   boost::shared_ptr<LayoutFrame> layoutStack;
00131   bool atBeginningOfLine;
00132 
00133   void beginBlock(const sherpa::LToken& tok);
00134   void endBlock(const sherpa::LToken& tok);
00135 
00136 private:
00137   bool closeToOffset(unsigned offset);
00138   bool conditionallyInsertSemicolon(unsigned offset);
00139 
00140 
00141 public:
00142   enum LangFlagsValues {
00143     lf_block = 0x1u,
00144     lf_version = 0x2u
00145   };
00146 
00147   typedef sherpa::EnumSet<LangFlagsValues> LangFlags;
00148 
00149   LangFlags currentLang;
00150 
00152   sherpa::LexLoc here;
00153 
00162   int num_errors;
00167   bool debug;
00175   bool isRuntimeUoc;
00180   bool ifIdentMode;
00189   bool isCommandLineInput;
00191   std::istream& inStream;
00193   std::ostream& errStream;
00194 
00202   unsigned nModules;
00203 
00204 private:
00209   std::string thisToken;
00210 
00211   PushBack pushBackStack;
00212 
00214   ucs4_t getChar();
00216   void ungetChar(ucs4_t);
00217   void ungetThisToken();
00218 
00219   bool valid_ascii_symbol(ucs4_t ucs4);
00220 
00221   bool valid_operator_start(ucs4_t ucs4);
00222   bool valid_operator_continue(ucs4_t ucs4);
00223 
00224   bool valid_ident_start(ucs4_t ucs4);
00225   bool valid_ident_continue(ucs4_t ucs4);
00226   bool valid_ifident_start(ucs4_t ucs4);
00227   bool valid_ifident_continue(ucs4_t ucs4);
00228   bool valid_tv_ident_start(ucs4_t ucs4);
00229   bool valid_tv_ident_continue(ucs4_t ucs4);
00230 public:
00231 
00239   TransitionLexer(std::ostream& errStream, std::istream& inStream, 
00240              const std::string& origin,
00241              bool commandLineInput);
00242 
00245   void ReportParseError(const sherpa::LexLoc& loc, std::string  msg);
00248   void ReportParseWarning(const sherpa::LexLoc& loc, std::string msg);
00249 
00251   void ReportParseError();
00253   void ReportParseError(std::string msg)
00254   {
00255     ReportParseError(lastToken.loc, msg);
00256   }
00258   inline void ReportParseWarning(std::string msg)
00259   {
00260     ReportParseWarning(lastToken.loc, msg);
00261   }
00262 
00264   inline void setDebug(bool showlex)
00265   {
00266     debug = (showlex ? true : false);
00267   }
00268 
00271   inline void setIfIdentMode(bool arg)
00272   {
00273     ifIdentMode = arg;
00274   }
00275 
00277   int lex(ParseType *yylvalp);
00278 
00280   ~TransitionLexer() {}
00281 
00286   struct KeyWord {
00287     const char *nm;
00291     LangFlags whichLang;
00292     int tokValue;
00293 
00294     KeyWord(const char *_nm, LangFlags _whichLang, int _tokValue);
00295   };
00296 
00297 private:
00299   static KeyWord keywords[];
00300 
00308   int kwCheck(const char *s, int identType);
00309 
00316   sherpa::LexLoc skipWhiteSpaceAndComments();
00317 
00324   sherpa::LToken getNextToken();
00325 
00326   void showToken(std::ostream& errStream, const sherpa::LToken& tok);
00327 
00328   std::vector<sherpa::LToken> pushbackTokens;
00329   inline bool havePushbackToken()
00330   {
00331     return (pushbackTokens.size() != 0);
00332   }
00333 
00334 public:
00339   void pushTokenBack(const sherpa::LToken& tok, bool verbose = false);
00340 
00341 private:
00342   sherpa::LToken popToken();
00343 
00349   sherpa::LToken getNextInputToken();
00350 };
00351 
00352 #endif /* TRANSITIONLEXER_HXX */
00353 

Generated on Thu May 17 23:59:16 2012 for BitC Compiler by  doxygen 1.4.7