1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-01-22 04:53:52 +00:00
openmw/components/compiler/scanner.hpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

323 lines
7.9 KiB
C++
Raw Normal View History

2010-06-27 17:20:21 +00:00
#ifndef COMPILER_SCANNER_H_INCLUDED
#define COMPILER_SCANNER_H_INCLUDED
#include <cctype>
2010-06-27 17:20:21 +00:00
#include <iosfwd>
#include <istream>
2010-06-27 17:20:21 +00:00
#include <string>
2011-01-12 17:24:00 +00:00
#include <vector>
2010-06-27 17:20:21 +00:00
#include "tokenloc.hpp"
namespace Compiler
{
class ErrorHandler;
class Parser;
2010-07-03 07:54:01 +00:00
class Extensions;
2010-06-27 17:20:21 +00:00
/// \brief Scanner
///
/// This class translate a char-stream to a token stream (delivered via
/// parser-callbacks).
class MultiChar
{
public:
MultiChar() { blank(); }
explicit MultiChar(const char ch)
{
blank();
mData[0] = ch;
mLength = getCharLength(ch);
}
static int getCharLength(const char ch)
{
unsigned char c = ch;
if (c <= 127)
return 0;
else if ((c & 0xE0) == 0xC0)
return 1;
else if ((c & 0xF0) == 0xE0)
return 2;
else if ((c & 0xF8) == 0xF0)
return 3;
else
return -1;
}
bool operator==(const char ch) const
{
return mData[0] == ch && mData[1] == 0 && mData[2] == 0 && mData[3] == 0;
}
bool operator==(const MultiChar& ch) const
{
return mData[0] == ch.mData[0] && mData[1] == ch.mData[1] && mData[2] == ch.mData[2]
&& mData[3] == ch.mData[3];
}
bool operator!=(const char ch) const
{
return mData[0] != ch || mData[1] != 0 || mData[2] != 0 || mData[3] != 0;
}
bool isWhitespace() const
{
2021-11-13 13:06:21 +00:00
return (mData[0] == ' ' || mData[0] == '\t' || mData[0] == ',') && mData[1] == 0 && mData[2] == 0
&& mData[3] == 0;
}
2024-01-01 11:48:12 +00:00
bool isDigit() const
{
return std::isdigit(static_cast<unsigned char>(mData[0])) && mData[1] == 0 && mData[2] == 0
&& mData[3] == 0;
}
bool isMinusSign() const
{
if (mData[0] == '-' && mData[1] == 0 && mData[2] == 0 && mData[3] == 0)
return true;
return mData[0] == '\xe2' && mData[1] == '\x80' && mData[2] == '\x93' && mData[3] == 0;
}
bool isAlpha() const
{
if (isMinusSign())
return false;
2024-01-01 11:48:12 +00:00
return std::isalpha(static_cast<unsigned char>(mData[0])) || mData[1] != 0 || mData[2] != 0
|| mData[3] != 0;
}
void appendTo(std::string& str) const
{
for (int i = 0; i <= mLength; i++)
str += mData[i];
}
void putback(std::istream& in) const
{
for (int i = mLength; i >= 0; i--)
in.putback(mData[i]);
}
bool getFrom(std::istream& in)
{
blank();
2021-05-02 08:59:22 +00:00
char ch = static_cast<char>(in.peek());
if (!in.good())
return false;
int length = getCharLength(ch);
if (length < 0)
return false;
for (int i = 0; i <= length; i++)
{
in.get(ch);
if (!in.good())
return false;
mData[i] = ch;
}
mLength = length;
return true;
}
bool peek(std::istream& in)
{
std::streampos p_orig = in.tellg();
2021-05-02 08:59:22 +00:00
char ch = static_cast<char>(in.peek());
if (!in.good())
return false;
int length = getCharLength(ch);
if (length < 0)
return false;
for (int i = 0; i <= length; i++)
{
2021-01-09 09:52:01 +00:00
in.get(ch);
2021-01-09 09:52:01 +00:00
if (!in.good())
return false;
2021-01-09 09:52:01 +00:00
mData[i] = ch;
}
mLength = length;
in.seekg(p_orig);
return true;
2022-10-05 21:45:17 +00:00
}
void blank()
{
2021-05-02 08:59:22 +00:00
std::fill(std::begin(mData), std::end(mData), '\0');
mLength = -1;
}
std::string data() const
{
// NB: mLength is the number of the last element in the array
return std::string(mData, mLength + 1);
}
private:
char mData[4]{};
int mLength{};
};
2010-06-27 17:20:21 +00:00
class Scanner
{
enum putback_type
{
Putback_None,
Putback_Special,
Putback_Integer,
2010-06-27 17:20:21 +00:00
Putback_Float,
Putback_Name,
Putback_Keyword
2010-06-27 17:20:21 +00:00
};
ErrorHandler& mErrorHandler;
TokenLoc mLoc;
TokenLoc mPrevLoc;
std::istream& mStream;
const Extensions* mExtensions;
putback_type mPutback;
int mPutbackCode;
int mPutbackInteger;
2010-06-27 17:20:21 +00:00
float mPutbackFloat;
std::string mPutbackName;
TokenLoc mPutbackLoc;
bool mStrictKeywords;
bool mTolerantNames;
bool mIgnoreNewline;
bool mExpectName;
bool mIgnoreSpecial;
2010-06-27 17:20:21 +00:00
2022-09-22 18:26:05 +00:00
public:
2010-06-27 17:20:21 +00:00
enum keyword
2022-09-22 18:26:05 +00:00
{
K_begin,
K_end,
K_short,
K_long,
K_float,
K_if,
K_endif,
K_else,
2010-06-27 17:20:21 +00:00
K_elseif,
K_while,
K_endwhile,
K_return,
K_messagebox,
2022-09-22 18:26:05 +00:00
K_set,
K_to
};
2010-06-27 17:20:21 +00:00
enum special
2022-09-22 18:26:05 +00:00
{
2010-06-27 17:20:21 +00:00
S_newline,
S_open,
2022-09-22 18:26:05 +00:00
S_close,
S_cmpEQ,
2010-06-27 17:20:21 +00:00
S_cmpNE,
2022-09-22 18:26:05 +00:00
S_cmpLT,
S_cmpLE,
S_cmpGT,
S_cmpGE,
S_plus,
S_minus,
S_mult,
S_div,
S_ref,
2010-06-27 17:20:21 +00:00
S_member
};
2022-09-22 18:26:05 +00:00
private:
2010-06-27 17:20:21 +00:00
// not implemented
2022-09-22 18:26:05 +00:00
2010-06-27 17:20:21 +00:00
Scanner(const Scanner&);
Scanner& operator=(const Scanner&);
2010-06-27 17:20:21 +00:00
bool get(MultiChar& c);
2010-06-27 17:20:21 +00:00
void putback(MultiChar& c);
bool scanToken(Parser& parser);
2010-06-27 17:20:21 +00:00
bool scanInt(MultiChar& c, Parser& parser, bool& cont);
bool scanFloat(const std::string& intValue, Parser& parser, bool& cont);
2010-06-27 17:20:21 +00:00
2015-01-03 12:59:59 +00:00
bool scanName(MultiChar& c, Parser& parser, bool& cont, std::string name = {});
2010-06-27 17:20:21 +00:00
/// \param name May contain the start of the name (one or more characters)
bool scanName(std::string& name);
bool scanSpecial(MultiChar& c, Parser& parser, bool& cont);
2010-06-27 17:20:21 +00:00
bool isStringCharacter(MultiChar& c, bool lookAhead = true);
2022-09-22 18:26:05 +00:00
public:
2010-07-03 07:54:01 +00:00
Scanner(ErrorHandler& errorHandler, std::istream& inputStream, const Extensions* extensions = nullptr);
2011-01-05 21:18:21 +00:00
///< constructor
2010-06-27 17:20:21 +00:00
void scan(Parser& parser);
///< Scan a token and deliver it to the parser.
2011-01-12 17:24:00 +00:00
void putbackSpecial(int code, const TokenLoc& loc);
///< put back a special token
2011-01-12 17:24:00 +00:00
void putbackInt(int value, const TokenLoc& loc);
///< put back an integer token
2011-01-12 17:24:00 +00:00
void putbackFloat(float value, const TokenLoc& loc);
///< put back a float token
2011-01-12 17:24:00 +00:00
void putbackName(const std::string& name, const TokenLoc& loc);
///< put back a name token
2011-01-12 17:24:00 +00:00
void putbackKeyword(int keyword, const TokenLoc& loc);
///< put back a keyword token
2011-01-12 17:24:00 +00:00
void listKeywords(std::vector<std::string>& keywords);
///< Append all known keywords to \a keywords.
/// Treat newline character as a part of script command.
///
/// \attention This mode lasts only until the next keyword is reached.
void enableIgnoreNewlines();
/// Do not accept keywords in quotation marks anymore.
///
/// \attention This mode lasts only until the next newline is reached.
void enableStrictKeywords();
/// Continue parsing a name when hitting a '.' or a '-'
///
/// \attention This mode lasts only until the next newline is reached.
void enableTolerantNames();
/// Treat '.' and '-' as the start of a name.
///
/// \attention This mode lasts only until the next newline is reached or the call to scan ends.
void enableExpectName();
2010-06-27 17:20:21 +00:00
};
}
#endif