#ifndef COMPILER_SCANNER_H_INCLUDED #define COMPILER_SCANNER_H_INCLUDED #include #include #include #include #include #include "tokenloc.hpp" namespace Compiler { class ErrorHandler; class Parser; class Extensions; /// \brief Scanner /// /// This class translate a char-stream to a token stream (delivered via /// parser-callbacks). class MultiChar { public: MultiChar() { blank(); } explicit MultiChar(const char ch) { blank(); mData[0] = ch; mLength = getCharLength(ch); } static int getCharLength(const char ch) { unsigned char c = ch; if (c <= 127) return 0; else if ((c & 0xE0) == 0xC0) return 1; else if ((c & 0xF0) == 0xE0) return 2; else if ((c & 0xF8) == 0xF0) return 3; else return -1; } bool operator==(const char ch) const { return mData[0] == ch && mData[1] == 0 && mData[2] == 0 && mData[3] == 0; } bool operator==(const MultiChar& ch) const { return mData[0] == ch.mData[0] && mData[1] == ch.mData[1] && mData[2] == ch.mData[2] && mData[3] == ch.mData[3]; } bool operator!=(const char ch) const { return mData[0] != ch || mData[1] != 0 || mData[2] != 0 || mData[3] != 0; } bool isWhitespace() const { return (mData[0] == ' ' || mData[0] == '\t' || mData[0] == ',') && mData[1] == 0 && mData[2] == 0 && mData[3] == 0; } bool isDigit() const { return std::isdigit(static_cast(mData[0])) && mData[1] == 0 && mData[2] == 0 && mData[3] == 0; } bool isMinusSign() const { if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0) return true; return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0; } bool isAlpha() const { if (isMinusSign()) return false; return std::isalpha(static_cast(mData[0])) || mData[1] != 0 || mData[2] != 0 || mData[3] != 0; } void appendTo(std::string& str) const { for (int i = 0; i <= mLength; i++) str += mData[i]; } void putback(std::istream& in) const { for (int i = mLength; i >= 0; i--) in.putback(mData[i]); } bool getFrom(std::istream& in) { blank(); char ch = static_cast(in.peek()); if (!in.good()) return false; int length = getCharLength(ch); if (length < 0) return false; for (int i = 0; i <= length; i++) { in.get(ch); if (!in.good()) return false; mData[i] = ch; } mLength = length; return true; } bool peek(std::istream& in) { std::streampos p_orig = in.tellg(); char ch = static_cast(in.peek()); if (!in.good()) return false; int length = getCharLength(ch); if (length < 0) return false; for (int i = 0; i <= length; i++) { in.get(ch); if (!in.good()) return false; mData[i] = ch; } mLength = length; in.seekg(p_orig); return true; } void blank() { std::fill(std::begin(mData), std::end(mData), '\0'); mLength = -1; } std::string data() const { // NB: mLength is the number of the last element in the array return std::string(mData, mLength + 1); } private: char mData[4]{}; int mLength{}; }; class Scanner { enum putback_type { Putback_None, Putback_Special, Putback_Integer, Putback_Float, Putback_Name, Putback_Keyword }; ErrorHandler& mErrorHandler; TokenLoc mLoc; TokenLoc mPrevLoc; std::istream& mStream; const Extensions* mExtensions; putback_type mPutback; int mPutbackCode; int mPutbackInteger; float mPutbackFloat; std::string mPutbackName; TokenLoc mPutbackLoc; bool mStrictKeywords; bool mTolerantNames; bool mIgnoreNewline; bool mExpectName; bool mIgnoreSpecial; public: enum keyword { K_begin, K_end, K_short, K_long, K_float, K_if, K_endif, K_else, K_elseif, K_while, K_endwhile, K_return, K_messagebox, K_set, K_to }; enum special { S_newline, S_open, S_close, S_cmpEQ, S_cmpNE, S_cmpLT, S_cmpLE, S_cmpGT, S_cmpGE, S_plus, S_minus, S_mult, S_div, S_ref, S_member }; private: // not implemented Scanner(const Scanner&); Scanner& operator=(const Scanner&); bool get(MultiChar& c); void putback(MultiChar& c); bool scanToken(Parser& parser); bool scanInt(MultiChar& c, Parser& parser, bool& cont); bool scanFloat(const std::string& intValue, Parser& parser, bool& cont); bool scanName(MultiChar& c, Parser& parser, bool& cont, std::string name = {}); /// \param name May contain the start of the name (one or more characters) bool scanName(std::string& name); bool scanSpecial(MultiChar& c, Parser& parser, bool& cont); bool isStringCharacter(MultiChar& c, bool lookAhead = true); public: Scanner(ErrorHandler& errorHandler, std::istream& inputStream, const Extensions* extensions = nullptr); ///< constructor void scan(Parser& parser); ///< Scan a token and deliver it to the parser. void putbackSpecial(int code, const TokenLoc& loc); ///< put back a special token void putbackInt(int value, const TokenLoc& loc); ///< put back an integer token void putbackFloat(float value, const TokenLoc& loc); ///< put back a float token void putbackName(const std::string& name, const TokenLoc& loc); ///< put back a name token void putbackKeyword(int keyword, const TokenLoc& loc); ///< put back a keyword token void listKeywords(std::vector& keywords); ///< Append all known keywords to \a keywords. /// Treat newline character as a part of script command. /// /// \attention This mode lasts only until the next keyword is reached. void enableIgnoreNewlines(); /// Do not accept keywords in quotation marks anymore. /// /// \attention This mode lasts only until the next newline is reached. void enableStrictKeywords(); /// Continue parsing a name when hitting a '.' or a '-' /// /// \attention This mode lasts only until the next newline is reached. void enableTolerantNames(); /// Treat '.' and '-' as the start of a name. /// /// \attention This mode lasts only until the next newline is reached or the call to scan ends. void enableExpectName(); }; } #endif