2010-06-27 17:20:21 +00:00
|
|
|
#ifndef COMPILER_SCANNER_H_INCLUDED
|
|
|
|
#define COMPILER_SCANNER_H_INCLUDED
|
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
#include <cctype>
|
2010-06-27 17:20:21 +00:00
|
|
|
#include <iosfwd>
|
2022-06-27 19:32:46 +00:00
|
|
|
#include <istream>
|
2010-06-27 17:20:21 +00:00
|
|
|
#include <string>
|
2011-01-12 17:24:00 +00:00
|
|
|
#include <vector>
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
#include "tokenloc.hpp"
|
|
|
|
|
|
|
|
namespace Compiler
|
|
|
|
{
|
|
|
|
class ErrorHandler;
|
|
|
|
class Parser;
|
2010-07-03 07:54:01 +00:00
|
|
|
class Extensions;
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
/// \brief Scanner
|
|
|
|
///
|
|
|
|
/// This class translate a char-stream to a token stream (delivered via
|
|
|
|
/// parser-callbacks).
|
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
class MultiChar
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
MultiChar() { blank(); }
|
|
|
|
|
2020-10-22 21:57:53 +00:00
|
|
|
explicit MultiChar(const char ch)
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
blank();
|
|
|
|
mData[0] = ch;
|
|
|
|
|
|
|
|
mLength = getCharLength(ch);
|
|
|
|
}
|
|
|
|
|
2020-10-22 21:57:53 +00:00
|
|
|
static int getCharLength(const char ch)
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
unsigned char c = ch;
|
|
|
|
if (c <= 127)
|
|
|
|
return 0;
|
|
|
|
else if ((c & 0xE0) == 0xC0)
|
|
|
|
return 1;
|
|
|
|
else if ((c & 0xF0) == 0xE0)
|
|
|
|
return 2;
|
|
|
|
else if ((c & 0xF8) == 0xF0)
|
|
|
|
return 3;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool operator==(const char ch) const
|
|
|
|
{
|
|
|
|
return mData[0] == ch && mData[1] == 0 && mData[2] == 0 && mData[3] == 0;
|
|
|
|
}
|
2019-10-29 07:05:18 +00:00
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool operator==(const MultiChar& ch) const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
return mData[0] == ch.mData[0] && mData[1] == ch.mData[1] && mData[2] == ch.mData[2]
|
|
|
|
&& mData[3] == ch.mData[3];
|
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool operator!=(const char ch) const
|
|
|
|
{
|
|
|
|
return mData[0] != ch || mData[1] != 0 || mData[2] != 0 || mData[3] != 0;
|
|
|
|
}
|
2019-10-29 07:05:18 +00:00
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool isWhitespace() const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
2021-11-13 13:06:21 +00:00
|
|
|
return (mData[0] == ' ' || mData[0] == '\t' || mData[0] == ',') && mData[1] == 0 && mData[2] == 0
|
|
|
|
&& mData[3] == 0;
|
2019-10-29 07:05:18 +00:00
|
|
|
}
|
|
|
|
|
2024-01-01 11:48:12 +00:00
|
|
|
bool isDigit() const
|
|
|
|
{
|
|
|
|
return std::isdigit(static_cast<unsigned char>(mData[0])) && mData[1] == 0 && mData[2] == 0
|
|
|
|
&& mData[3] == 0;
|
|
|
|
}
|
2019-10-29 07:05:18 +00:00
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool isMinusSign() const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0;
|
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
bool isAlpha() const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
if (isMinusSign())
|
|
|
|
return false;
|
|
|
|
|
2024-01-01 11:48:12 +00:00
|
|
|
return std::isalpha(static_cast<unsigned char>(mData[0])) || mData[1] != 0 || mData[2] != 0
|
|
|
|
|| mData[3] != 0;
|
2019-10-29 07:05:18 +00:00
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
void appendTo(std::string& str) const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
for (int i = 0; i <= mLength; i++)
|
|
|
|
str += mData[i];
|
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
void putback(std::istream& in) const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
|
|
|
for (int i = mLength; i >= 0; i--)
|
|
|
|
in.putback(mData[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool getFrom(std::istream& in)
|
|
|
|
{
|
|
|
|
blank();
|
|
|
|
|
2021-05-02 08:59:22 +00:00
|
|
|
char ch = static_cast<char>(in.peek());
|
2019-10-29 07:05:18 +00:00
|
|
|
|
|
|
|
if (!in.good())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
int length = getCharLength(ch);
|
|
|
|
if (length < 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (int i = 0; i <= length; i++)
|
|
|
|
{
|
|
|
|
in.get(ch);
|
|
|
|
|
|
|
|
if (!in.good())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
mData[i] = ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
mLength = length;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool peek(std::istream& in)
|
|
|
|
{
|
|
|
|
std::streampos p_orig = in.tellg();
|
|
|
|
|
2021-05-02 08:59:22 +00:00
|
|
|
char ch = static_cast<char>(in.peek());
|
2019-10-29 07:05:18 +00:00
|
|
|
|
|
|
|
if (!in.good())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
int length = getCharLength(ch);
|
|
|
|
if (length < 0)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (int i = 0; i <= length; i++)
|
|
|
|
{
|
2021-01-09 09:52:01 +00:00
|
|
|
in.get(ch);
|
2019-10-29 07:05:18 +00:00
|
|
|
|
2021-01-09 09:52:01 +00:00
|
|
|
if (!in.good())
|
|
|
|
return false;
|
2019-10-29 07:05:18 +00:00
|
|
|
|
2021-01-09 09:52:01 +00:00
|
|
|
mData[i] = ch;
|
2019-10-29 07:05:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
mLength = length;
|
|
|
|
|
|
|
|
in.seekg(p_orig);
|
|
|
|
return true;
|
2022-10-05 21:45:17 +00:00
|
|
|
}
|
2019-10-29 07:05:18 +00:00
|
|
|
|
|
|
|
void blank()
|
|
|
|
{
|
2021-05-02 08:59:22 +00:00
|
|
|
std::fill(std::begin(mData), std::end(mData), '\0');
|
2019-10-29 07:05:18 +00:00
|
|
|
mLength = -1;
|
|
|
|
}
|
|
|
|
|
2023-03-05 18:15:43 +00:00
|
|
|
std::string data() const
|
2019-10-29 07:05:18 +00:00
|
|
|
{
|
2019-11-05 19:33:14 +00:00
|
|
|
// NB: mLength is the number of the last element in the array
|
|
|
|
return std::string(mData, mLength + 1);
|
2019-10-29 07:05:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2020-10-22 21:57:53 +00:00
|
|
|
char mData[4]{};
|
|
|
|
int mLength{};
|
2019-10-29 07:05:18 +00:00
|
|
|
};
|
|
|
|
|
2010-06-27 17:20:21 +00:00
|
|
|
class Scanner
|
|
|
|
{
|
2010-06-30 09:03:08 +00:00
|
|
|
enum putback_type
|
|
|
|
{
|
2010-07-01 09:07:21 +00:00
|
|
|
Putback_None,
|
2010-06-30 09:03:08 +00:00
|
|
|
Putback_Special,
|
2010-07-01 09:07:21 +00:00
|
|
|
Putback_Integer,
|
2010-06-27 17:20:21 +00:00
|
|
|
Putback_Float,
|
2010-06-30 10:04:26 +00:00
|
|
|
Putback_Name,
|
2021-09-19 17:53:38 +00:00
|
|
|
Putback_Keyword
|
2010-06-27 17:20:21 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
ErrorHandler& mErrorHandler;
|
|
|
|
TokenLoc mLoc;
|
|
|
|
TokenLoc mPrevLoc;
|
|
|
|
std::istream& mStream;
|
|
|
|
const Extensions* mExtensions;
|
|
|
|
putback_type mPutback;
|
2010-06-30 09:03:08 +00:00
|
|
|
int mPutbackCode;
|
|
|
|
int mPutbackInteger;
|
2010-06-27 17:20:21 +00:00
|
|
|
float mPutbackFloat;
|
|
|
|
std::string mPutbackName;
|
2010-06-29 09:24:07 +00:00
|
|
|
TokenLoc mPutbackLoc;
|
2015-12-03 10:14:58 +00:00
|
|
|
bool mStrictKeywords;
|
2018-01-12 15:37:06 +00:00
|
|
|
bool mTolerantNames;
|
2018-08-18 12:12:01 +00:00
|
|
|
bool mIgnoreNewline;
|
2021-06-19 09:21:37 +00:00
|
|
|
bool mExpectName;
|
2022-12-27 13:59:56 +00:00
|
|
|
bool mIgnoreSpecial;
|
2010-06-27 17:20:21 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
public:
|
2010-06-27 17:20:21 +00:00
|
|
|
enum keyword
|
2022-09-22 18:26:05 +00:00
|
|
|
{
|
|
|
|
K_begin,
|
|
|
|
K_end,
|
|
|
|
K_short,
|
|
|
|
K_long,
|
|
|
|
K_float,
|
|
|
|
K_if,
|
|
|
|
K_endif,
|
|
|
|
K_else,
|
2010-06-27 17:20:21 +00:00
|
|
|
K_elseif,
|
|
|
|
K_while,
|
|
|
|
K_endwhile,
|
|
|
|
K_return,
|
|
|
|
K_messagebox,
|
2022-09-22 18:26:05 +00:00
|
|
|
K_set,
|
|
|
|
K_to
|
|
|
|
};
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
enum special
|
2022-09-22 18:26:05 +00:00
|
|
|
{
|
2010-06-27 17:20:21 +00:00
|
|
|
S_newline,
|
|
|
|
S_open,
|
2022-09-22 18:26:05 +00:00
|
|
|
S_close,
|
|
|
|
S_cmpEQ,
|
2010-06-27 17:20:21 +00:00
|
|
|
S_cmpNE,
|
2022-09-22 18:26:05 +00:00
|
|
|
S_cmpLT,
|
|
|
|
S_cmpLE,
|
|
|
|
S_cmpGT,
|
|
|
|
S_cmpGE,
|
|
|
|
S_plus,
|
|
|
|
S_minus,
|
|
|
|
S_mult,
|
|
|
|
S_div,
|
|
|
|
S_ref,
|
2010-06-27 17:20:21 +00:00
|
|
|
S_member
|
|
|
|
};
|
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
private:
|
2010-06-27 17:20:21 +00:00
|
|
|
// not implemented
|
2022-09-22 18:26:05 +00:00
|
|
|
|
2010-06-27 17:20:21 +00:00
|
|
|
Scanner(const Scanner&);
|
2019-10-29 07:05:18 +00:00
|
|
|
Scanner& operator=(const Scanner&);
|
2010-06-27 17:20:21 +00:00
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
bool get(MultiChar& c);
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
void putback(MultiChar& c);
|
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
bool scanToken(Parser& parser);
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
bool scanInt(MultiChar& c, Parser& parser, bool& cont);
|
|
|
|
|
2021-09-16 16:06:46 +00:00
|
|
|
bool scanFloat(const std::string& intValue, Parser& parser, bool& cont);
|
2010-06-27 17:20:21 +00:00
|
|
|
|
2015-01-03 12:59:59 +00:00
|
|
|
bool scanName(MultiChar& c, Parser& parser, bool& cont, std::string name = {});
|
2010-06-27 17:20:21 +00:00
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
/// \param name May contain the start of the name (one or more characters)
|
|
|
|
bool scanName(std::string& name);
|
2014-07-15 08:39:11 +00:00
|
|
|
|
2019-10-29 07:05:18 +00:00
|
|
|
bool scanSpecial(MultiChar& c, Parser& parser, bool& cont);
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
bool isStringCharacter(MultiChar& c, bool lookAhead = true);
|
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
public:
|
2010-07-03 07:54:01 +00:00
|
|
|
Scanner(ErrorHandler& errorHandler, std::istream& inputStream, const Extensions* extensions = nullptr);
|
2011-01-05 21:18:21 +00:00
|
|
|
///< constructor
|
2010-06-27 17:20:21 +00:00
|
|
|
|
|
|
|
void scan(Parser& parser);
|
|
|
|
///< Scan a token and deliver it to the parser.
|
2010-06-30 09:03:08 +00:00
|
|
|
|
2011-01-12 17:24:00 +00:00
|
|
|
void putbackSpecial(int code, const TokenLoc& loc);
|
2010-06-30 09:03:08 +00:00
|
|
|
///< put back a special token
|
2010-07-01 09:07:21 +00:00
|
|
|
|
2011-01-12 17:24:00 +00:00
|
|
|
void putbackInt(int value, const TokenLoc& loc);
|
|
|
|
///< put back an integer token
|
2010-07-01 09:07:21 +00:00
|
|
|
|
2011-01-12 17:24:00 +00:00
|
|
|
void putbackFloat(float value, const TokenLoc& loc);
|
|
|
|
///< put back a float token
|
2010-07-01 09:07:21 +00:00
|
|
|
|
2011-01-12 17:24:00 +00:00
|
|
|
void putbackName(const std::string& name, const TokenLoc& loc);
|
2015-12-03 10:14:58 +00:00
|
|
|
///< put back a name token
|
2010-07-01 09:07:21 +00:00
|
|
|
|
2011-01-12 17:24:00 +00:00
|
|
|
void putbackKeyword(int keyword, const TokenLoc& loc);
|
2010-07-01 09:07:21 +00:00
|
|
|
///< put back a keyword token
|
2011-01-12 17:24:00 +00:00
|
|
|
|
|
|
|
void listKeywords(std::vector<std::string>& keywords);
|
2015-12-03 10:14:58 +00:00
|
|
|
///< Append all known keywords to \a keywords.
|
|
|
|
|
2018-08-18 12:12:01 +00:00
|
|
|
/// Treat newline character as a part of script command.
|
|
|
|
///
|
|
|
|
/// \attention This mode lasts only until the next keyword is reached.
|
|
|
|
void enableIgnoreNewlines();
|
|
|
|
|
2015-12-03 10:14:58 +00:00
|
|
|
/// Do not accept keywords in quotation marks anymore.
|
|
|
|
///
|
|
|
|
/// \attention This mode lasts only until the next newline is reached.
|
|
|
|
void enableStrictKeywords();
|
2018-01-12 15:37:06 +00:00
|
|
|
|
|
|
|
/// Continue parsing a name when hitting a '.' or a '-'
|
|
|
|
///
|
|
|
|
/// \attention This mode lasts only until the next newline is reached.
|
|
|
|
void enableTolerantNames();
|
2021-06-19 09:21:37 +00:00
|
|
|
|
|
|
|
/// Treat '.' and '-' as the start of a name.
|
|
|
|
///
|
|
|
|
/// \attention This mode lasts only until the next newline is reached or the call to scan ends.
|
|
|
|
void enableExpectName();
|
2010-06-27 17:20:21 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|