added tokenizing of MW scripts

pull/7/head
Marc Zinnschlag 15 years ago
parent 31e22186ef
commit ddcbc8cd62

@ -66,9 +66,16 @@ set(MISC_HEADER components/misc/fileops.hpp components/misc/slice_array.hpp
components/misc/stringops.hpp) components/misc/stringops.hpp)
source_group(misc FILES ${MISC} ${MISC_HEADER}) source_group(misc FILES ${MISC} ${MISC_HEADER})
set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${OGRE} ${INPUT} ${MISC}) set(COMPILER components/compiler/errorhandler.cpp
components/compiler/fileparser.cpp
components/compiler/parser.cpp components/compiler/scanner.cpp
components/compiler/streamerrorhandler.cpp)
file(GLOB COMPILER_HEADER components/compiler/*.hpp)
source_group(compiler FILES ${COMPILER} ${COMPILER_HEADER})
set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${OGRE} ${INPUT} ${MISC} ${COMPILER})
set(COMPONENTS_HEADER ${BSA_HEADER} ${NIF_HEADER} ${NIFOGRE_HEADER} ${ESM_STORE_HEADER} set(COMPONENTS_HEADER ${BSA_HEADER} ${NIF_HEADER} ${NIFOGRE_HEADER} ${ESM_STORE_HEADER}
${ESM_HEADER} ${OGRE_HEADER} ${INPUT_HEADER} ${MISC_HEADER}) ${ESM_HEADER} ${OGRE_HEADER} ${INPUT_HEADER} ${MISC_HEADER} ${COMPILER_HEADER})
# source directory: libs # source directory: libs
@ -163,7 +170,7 @@ endif (APPLE)
option(BUILD_MWCOMPILER "build standalone Morrowind script compiler" ON) option(BUILD_MWCOMPILER "build standalone Morrowind script compiler" ON)
if (BUILD_MWCOMPILER) if (BUILD_MWCOMPILER)
set(TOOLS_MWCOMPILER apps/mwcompiler/main.cpp) set(TOOLS_MWCOMPILER ${COMPILER} apps/mwcompiler/main.cpp)
add_executable(mwcompiler ${TOOLS_MWCOMPILER}) add_executable(mwcompiler ${TOOLS_MWCOMPILER})
endif() endif()

@ -1,8 +1,31 @@
// Stand-alone MW-script compiler // Stand-alone MW-script compiler
#include <exception>
#include <iostream>
#include <fstream>
#include <components/compiler/streamerrorhandler.hpp>
#include <components/compiler/scanner.hpp>
#include <components/compiler/fileparser.hpp>
#include <components/compiler/context.hpp>
int main (int argc, char **argv) int main (int argc, char **argv)
{ {
try
{
Compiler::Context context;
Compiler::StreamErrorHandler errorHandler (std::cout);
Compiler::FileParser parser (errorHandler, context);
std::ifstream file ("test.mwscript");
Compiler::Scanner scanner (errorHandler, file);
scanner.scan (parser);
}
catch (const std::exception &e)
{
std::cout << "\nERROR: " << e.what() << std::endl;
return 1;
}
} }

@ -0,0 +1,13 @@
#ifndef COMPILER_CONTEXT_H_INCLUDED
#define COMPILER_CONTEXT_H_INCLUDED
namespace Compiler
{
class Context
{
};
}
#endif

@ -0,0 +1,65 @@
#include "errorhandler.hpp"
namespace Compiler
{
// constructor
ErrorHandler::ErrorHandler() : mWarnings (0), mErrors (0) {}
// destructor
ErrorHandler::~ErrorHandler() {}
// Was compiling successful?
bool ErrorHandler::isGood() const
{
return mErrors==0;
}
// Return number of errors
int ErrorHandler::countErrors() const
{
return mErrors;
}
// Return number of warnings
int ErrorHandler::countWarnings() const
{
return mWarnings;
}
// Generate a warning message.
void ErrorHandler::warning (const std::string& message, const TokenLoc& loc)
{
++mWarnings;
report (message, loc, WarningMessage);
}
// Generate an error message.
void ErrorHandler::error (const std::string& message, const TokenLoc& loc)
{
++mErrors;
report (message, loc, ErrorMessage);
}
// Generate an error message for an unexpected EOF.
void ErrorHandler::endOfFile()
{
++mErrors;
report ("unexpected end of file", ErrorMessage);
}
// Remove all previous error/warning events
void ErrorHandler::reset()
{
mErrors = mWarnings = 0;
}
}

@ -0,0 +1,68 @@
#ifndef COMPILER_ERRORHANDLER_H_INCLUDED
#define COMPILER_ERRORHANDLER_H_INCLUDED
#include <string>
namespace Compiler
{
struct TokenLoc;
/// \brief Error handling
///
/// This class collects errors and provides an interface for reporting them to the user.
class ErrorHandler
{
int mWarnings;
int mErrors;
protected:
enum Type
{
WarningMessage, ErrorMessage
};
private:
// mutators
virtual void report (const std::string& message, const TokenLoc& loc, Type type) = 0;
///< Report error to the user.
virtual void report (const std::string& message, Type type) = 0;
///< Report a file related error
public:
ErrorHandler();
///< constructor
virtual ~ErrorHandler();
///< destructor
bool isGood() const;
///< Was compiling successful?
int countErrors() const;
///< Return number of errors
int countWarnings() const;
///< Return number of warnings
void warning (const std::string& message, const TokenLoc& loc);
///< Generate a warning message.
void error (const std::string& message, const TokenLoc& loc);
///< Generate an error message.
void endOfFile();
///< Generate an error message for an unexpected EOF.
virtual void reset();
///< Remove all previous error/warning events
};
}
#endif

@ -0,0 +1,32 @@
#ifndef COMPILER_EXCEPTION_H_INCLUDED
#define COMPILER_EXCEPTION_H_INCLUDED
#include <exception>
namespace Compiler
{
/// \brief Exception: Error while parsing the source
class SourceException : public std::exception
{
virtual const char *what() const throw() { return "compile error";}
///< Return error message
};
/// \brief Exception: File error
class FileException
{
virtual const char *what() const throw() { return "can't read file"; }
///< Return error message
};
/// \brief Exception: EOF condition encountered
class EOFException
{ virtual const char *what() const throw() { return "end of file"; }
///< Return error message
};
}
#endif

@ -0,0 +1,56 @@
#include "fileparser.hpp"
#include <iostream>
#include "tokenloc.hpp"
#include "scanner.hpp"
namespace Compiler
{
FileParser::FileParser (ErrorHandler& errorHandler, Context& context)
: Parser (errorHandler, context)
{}
bool FileParser::parseInt (int value, const TokenLoc& loc, Scanner& scanner)
{
std::cout << "integer: " << value << std::endl;
return true;
}
bool FileParser::parseFloat (float value, const TokenLoc& loc, Scanner& scanner)
{
std::cout << "float: " << value << std::endl;
return true;
}
bool FileParser::parseName (const std::string& name, const TokenLoc& loc,
Scanner& scanner)
{
std::cout << "name: " << name << std::endl;
return true;
}
bool FileParser::parseKeyword (int keyword, const TokenLoc& loc, Scanner& scanner)
{
std::cout << "keyword: " << loc.mLiteral << std::endl;
return true;
}
bool FileParser::parseSpecial (int code, const TokenLoc& loc, Scanner& scanner)
{
if (code==Scanner::S_newline)
std::cout << "newline" << std::endl;
else
std::cout << "special: " << loc.mLiteral << std::endl;
return true;
}
void FileParser::parseEOF (Scanner& scanner)
{
std::cout << "end of file" << std::endl;
}
}

@ -0,0 +1,40 @@
#ifndef COMPILER_FILEPARSER_H_INCLUDED
#define COMPILER_FILEPARSER_H_INCLUDED
#include "parser.hpp"
namespace Compiler
{
class FileParser : public Parser
{
public:
FileParser (ErrorHandler& errorHandler, Context& context);
virtual bool parseInt (int value, const TokenLoc& loc, Scanner& scanner);
///< Handle an int token.
/// \return fetch another token?
virtual bool parseFloat (float value, const TokenLoc& loc, Scanner& scanner);
///< Handle a double token.
/// \return fetch another token?
virtual bool parseName (const std::string& name, const TokenLoc& loc,
Scanner& scanner);
///< Handle a name token.
/// \return fetch another token?
virtual bool parseKeyword (int keyword, const TokenLoc& loc, Scanner& scanner);
///< Handle a keyword token.
/// \return fetch another token?
virtual bool parseSpecial (int code, const TokenLoc& loc, Scanner& scanner);
///< Handle a special character token.
/// \return fetch another token?
virtual void parseEOF (Scanner& scanner);
///< Handle EOF token.
};
}
#endif

@ -0,0 +1,126 @@
#include "parser.hpp"
#include "errorhandler.hpp"
#include "exception.hpp"
namespace Compiler
{
// Report the error and throw an exception.
void Parser::reportSeriousError (const std::string& message, const TokenLoc& loc)
{
mErrorHandler.error (message, loc);
throw SourceException();
}
// Report the error
void Parser::reportError (const std::string& message, const TokenLoc& loc)
{
mErrorHandler.error (message, loc);
}
// Report the warning without throwing an exception.
void Parser::reportWarning (const std::string& message, const TokenLoc& loc)
{
mErrorHandler.warning (message, loc);
}
// Report an unexpected EOF condition.
void Parser::reportEOF()
{
mErrorHandler.endOfFile();
throw EOFException();
}
// Return error handler
ErrorHandler& Parser::getErrorHandler()
{
return mErrorHandler;
}
// Return context
Context& Parser::getContext()
{
return mContext;
}
Parser::Parser (ErrorHandler& errorHandler, Context& context)
: mErrorHandler (errorHandler), mContext (context)
{}
// destructor
Parser::~Parser() {}
// Handle an int token.
// \return fetch another token?
//
// - Default-implementation: Report an error.
bool Parser::parseInt (int value, const TokenLoc& loc, Scanner& scanner)
{
reportSeriousError ("Unexpected numeric value", loc);
return false;
}
// Handle a float token.
// \return fetch another token?
//
// - Default-implementation: Report an error.
bool Parser::parseFloat (float value, const TokenLoc& loc, Scanner& scanner)
{
reportSeriousError ("Unexpected floating point value", loc);
return false;
}
// Handle a name token.
// \return fetch another token?
//
// - Default-implementation: Report an error.
bool Parser::parseName (const std::string& name, const TokenLoc& loc,
Scanner& scanner)
{
reportSeriousError ("Unexpected name", loc);
return false;
}
// Handle a keyword token.
// \return fetch another token?
//
// - Default-implementation: Report an error.
bool Parser::parseKeyword (int keyword, const TokenLoc& loc, Scanner& scanner)
{
reportSeriousError ("Unexpected keyword", loc);
return false;
}
// Handle a special character token.
// \return fetch another token?
//
// - Default-implementation: Report an error.
bool Parser::parseSpecial (int code, const TokenLoc& loc, Scanner& scanner)
{
reportSeriousError ("Unexpected special token", loc);
return false;
}
// Handle an EOF token.
//
// - Default-implementation: Report an error.
void Parser::parseEOF (Scanner& scanner)
{
reportEOF();
}
}

@ -0,0 +1,90 @@
#ifndef COMPILER_PARSER_H_INCLUDED
#define COMPILER_PARSER_H_INCLUDED
#include <string>
namespace Compiler
{
class Scanner;
class TokenLoc;
class ErrorHandler;
class Context;
/// \brief Parser base class
///
/// This class defines a callback-parser.
class Parser
{
ErrorHandler& mErrorHandler;
Context& mContext;
protected:
// mutators
void reportSeriousError (const std::string& message, const TokenLoc& loc);
///< Report the error and throw a exception.
void reportError (const std::string& message, const TokenLoc& loc);
///< Report the error
void reportWarning (const std::string& message, const TokenLoc& loc);
///< Report the warning without throwing an exception.
void reportEOF();
///< Report an unexpected EOF condition.
ErrorHandler& getErrorHandler();
///< Return error handler
Context& getContext();
///< Return context
public:
Parser (ErrorHandler& errorHandler, Context& context);
///< constructor
virtual ~Parser();
///< destructor
virtual bool parseInt (int value, const TokenLoc& loc, Scanner& scanner);
///< Handle an int token.
/// \return fetch another token?
///
/// - Default-implementation: Report an error.
virtual bool parseFloat (float value, const TokenLoc& loc, Scanner& scanner);
///< Handle a double token.
/// \return fetch another token?
///
/// - Default-implementation: Report an error.
virtual bool parseName (const std::string& name, const TokenLoc& loc,
Scanner& scanner);
///< Handle a name token.
/// \return fetch another token?
///
/// - Default-implementation: Report an error.
virtual bool parseKeyword (int keyword, const TokenLoc& loc, Scanner& scanner);
///< Handle a keyword token.
/// \return fetch another token?
///
/// - Default-implementation: Report an error.
virtual bool parseSpecial (int code, const TokenLoc& loc, Scanner& scanner);
///< Handle a special character token.
/// \return fetch another token?
///
/// - Default-implementation: Report an error.
virtual void parseEOF (Scanner& scanner);
///< Handle EOF token.
///
/// - Default-implementation: Report an error.
};
}
#endif

@ -0,0 +1,414 @@
#include "scanner.hpp"
#include <cctype>
#include <sstream>
#include <algorithm>
#include "exception.hpp"
#include "errorhandler.hpp"
#include "parser.hpp"
namespace Compiler
{
bool Scanner::get (char& c)
{
mStream.get (c);
if (!mStream.good())
return false;
mPrevLoc =mLoc;
if (c=='\n')
{
mLoc.mColumn = 0;
++mLoc.mLine;
mLoc.mLiteral.clear();
}
else
{
++mLoc.mColumn;
mLoc.mLiteral += c;
}
return true;
}
void Scanner::putback (char c)
{
mStream.putback (c);
mLoc = mPrevLoc;
}
bool Scanner::scanToken (Parser& parser)
{
char c;
if (!get (c))
{
parser.parseEOF (*this);
return false;
}
else if (c==';')
{
while (get (c) && c!='\n');
mLoc.mLiteral.clear();
return true;
}
else if (isWhitespace (c))
{
mLoc.mLiteral.clear();
return true;
}
else if (std::isdigit (c) || c=='-')
{
bool cont = false;
if (scanInt (c, parser, cont))
{
mLoc.mLiteral.clear();
return cont;
}
}
else if (std::isalpha (c) || c=='_' || c=='"')
{
bool cont = false;
if (scanName (c, parser, cont))
{
mLoc.mLiteral.clear();
return cont;
}
}
else if (c==13) // linux compatibility hack
{
return true;
}
else
{
bool cont = false;
if (scanSpecial (c, parser, cont))
{
mLoc.mLiteral.clear();
return cont;
}
}
TokenLoc loc (mLoc);
mLoc.mLiteral.clear();
mErrorHandler.error ("syntax error", loc);
throw SourceException();
}
bool Scanner::scanInt (char c, Parser& parser, bool& cont)
{
std::string value;
value += c;
bool empty = false;
bool error = false;
while (get (c))
{
if (std::isdigit (c))
{
value += c;
empty = false;
}
else if (std::isalpha (c) || c=='_')
error = true;
else if (c=='.' && !error)
{
return scanFloat (value, parser, cont);
}
else
{
putback (c);
break;
}
}
if (empty || error)
return false;
TokenLoc loc (mLoc);
mLoc.mLiteral.clear();
std::istringstream stream (value);
int intValue = 0;
stream >> intValue;
cont = parser.parseInt (intValue, loc, *this);
return true;
}
bool Scanner::scanFloat (const std::string& intValue, Parser& parser, bool& cont)
{
std::string value = intValue + ".";
char c;
bool empty = intValue.empty() || intValue=="-";
bool error = false;
while (get (c))
{
if (std::isdigit (c))
{
value += c;
empty = false;
}
else if (std::isalpha (c) || c=='_')
error = true;
else
{
putback (c);
break;
}
}
if (empty || error)
return false;
TokenLoc loc (mLoc);
mLoc.mLiteral.clear();
std::istringstream stream (value);
float floatValue = 0;
stream >> floatValue;
cont = parser.parseFloat (floatValue, loc, *this);
return true;
}
bool Scanner::scanName (char c, Parser& parser, bool& cont)
{
static const char *keywords[] =
{
"begin", "end",
"short", "long", "float",
"if", "endif", "else", "elseif",
"while", "endwhile",
"return",
"messagebox",
"set", "to",
0
};
std::string name;
if (!scanName (c, name))
return false;
TokenLoc loc (mLoc);
mLoc.mLiteral.clear();
if (name.size()>=2 && name[0]=='"' && name[name.size()-1]=='"')
{
name = name.substr (1, name.size()-2);
cont = parser.parseName (name, loc, *this);
return true;
}
int i = 0;
std::string lowerCase;
lowerCase.reserve (name.size());
std::transform (name.begin(), name.end(), std::back_inserter (lowerCase),
(int(*)(int)) std::tolower);
for (; keywords[i]; ++i)
if (lowerCase==keywords[i])
break;
cont =
keywords[i] ? parser.parseKeyword (i, loc, *this) : parser.parseName (name, loc, *this);
return true;
}
bool Scanner::scanName (char c, std::string& name)
{
bool first = false;
bool error = false;
name.clear();
putback (c);
while (get (c))
{
if (!name.empty() && name[0]=='"')
{
if (c=='"')
{
name += c;
break;
}
else if (c=='\\')
{
if (!get (c))
{
mErrorHandler.error ("incomplete escape sequence", mLoc);
break;
}
}
else if (c=='\n')
{
mErrorHandler.error ("incomplete string or name", mLoc);
break;
}
}
else if (!(c=='"' && name.empty()))
{
if (!(std::isalpha (c) || std::isdigit (c) || c=='_'))
{
putback (c);
break;
}
if (first && std::isdigit (c))
error = true;
}
name += c;
first = false;
}
return !error;
}
bool Scanner::scanSpecial (char c, Parser& parser, bool& cont)
{
int special = -1;
if (c=='\n')
special = S_newline;
else if (c=='(')
special = S_open;
else if (c==')')
special = S_close;
else if (c=='=')
{
if (get (c))
{
if (c=='=')
special = S_cmpEQ;
else
{
putback (c);
return false;
}
}
else
{
putback (c);
return false;
}
}
else if (c=='!')
{
if (get (c))
{
if (c=='=')
special = S_cmpNE;
else
{
putback (c);
return false;
}
}
else
return false;
}
else if (c=='-')
{
if (get (c))
{
if (c=='>')
special = S_ref;
else
{
putback (c);
special = S_minus;
}
}
else
special = S_minus;
}
else if (c=='<')
{
if (get (c))
{
if (c=='=')
special = S_cmpLE;
else
{
putback (c);
special = S_cmpLT;
}
}
else
special = S_cmpLT;
}
else if (c=='>')
{
if (get (c))
{
if (c=='=')
special = S_cmpGE;
else
{
putback (c);
special = S_cmpGT;
}
}
else
special = S_cmpGT;
}
else if (c==',')
special = S_comma;
else if (c=='+')
special = S_plus;
else if (c=='*')
special = S_mult;
else if (c=='/')
special = S_div;
else
return false;
TokenLoc loc (mLoc);
mLoc.mLiteral.clear();
cont = parser.parseSpecial (special, loc, *this);
return true;
}
bool Scanner::isWhitespace (char c)
{
return c==' ' || c=='\t';
}
// constructor
Scanner::Scanner (ErrorHandler& errorHandler, std::istream& inputStream)
: mErrorHandler (errorHandler), mStream (inputStream)
{
}
void Scanner::scan (Parser& parser)
{
while (scanToken (parser));
}
}

@ -0,0 +1,84 @@
#ifndef COMPILER_SCANNER_H_INCLUDED
#define COMPILER_SCANNER_H_INCLUDED
#include <string>
#include <iosfwd>
#include "tokenloc.hpp"
namespace Compiler
{
class ErrorHandler;
class Parser;
/// \brief Scanner
///
/// This class translate a char-stream to a token stream (delivered via
/// parser-callbacks).
class Scanner
{
ErrorHandler& mErrorHandler;
TokenLoc mLoc;
TokenLoc mPrevLoc;
std::istream& mStream;
public:
enum keyword
{
K_begin, K_end,
K_short, K_long, K_float,
K_if, K_endif, K_else, K_elseif,
K_while, K_endwhile,
K_return,
K_messageBox,
K_set, K_to
};
enum special
{
S_newline,
S_open, S_close,
S_cmpEQ, S_cmpNE, S_cmpLT, S_cmpLE, S_cmpGT, S_cmpGE,
S_plus, S_minus, S_mult, S_div,
S_comma,
S_ref
};
private:
// not implemented
Scanner (const Scanner&);
Scanner& operator= (const Scanner&);
bool get (char& c);
void putback (char c);
bool scanToken (Parser& parser);
bool scanInt (char c, Parser& parser, bool& cont);
bool scanFloat (const std::string& intValue, Parser& parser, bool& cont);
bool scanName (char c, Parser& parser, bool& cont);
bool scanName (char c, std::string& name);
bool scanSpecial (char c, Parser& parser, bool& cont);
static bool isWhitespace (char c);
public:
Scanner (ErrorHandler& errorHandler, std::istream& inputStream);
///< constructor
void scan (Parser& parser);
///< Scan a token and deliver it to the parser.
};
}
#endif

@ -0,0 +1,38 @@
#include "streamerrorhandler.hpp"
#include "tokenloc.hpp"
namespace Compiler
{
// Report error to the user.
void StreamErrorHandler::report (const std::string& message, const TokenLoc& loc,
Type type)
{
if (type==ErrorMessage)
mStream << "error ";
else
mStream << "warning ";
mStream
<< "line " << loc.mLine << ", column " << loc.mColumn
<< " (" << loc.mLiteral << ")" << std::endl
<< " " << message << std::endl;
}
// Report a file related error
void StreamErrorHandler::report (const std::string& message, Type type)
{
if (type==ErrorMessage)
mStream << "error ";
else
mStream << "warning ";
mStream
<< "file:" << std::endl
<< " " << message << std::endl;
}
StreamErrorHandler::StreamErrorHandler (std::ostream& ErrorStream) : mStream (ErrorStream) {} }

@ -0,0 +1,37 @@
#ifndef COMPILER_STREAMERRORHANDLER_H_INCLUDED
#define COMPILER_STREAMERRORHANDLER_H_INCLUDED
#include <ostream>
#include "errorhandler.hpp"
namespace Compiler
{
/// \brief Error handler implementation: Write errors into stream
class StreamErrorHandler : public ErrorHandler
{
std::ostream& mStream;
// not implemented
StreamErrorHandler (const StreamErrorHandler&);
StreamErrorHandler& operator= (const StreamErrorHandler&);
virtual void report (const std::string& message, const TokenLoc& loc, Type type);
///< Report error to the user.
virtual void report (const std::string& message, Type type);
///< Report a file related error
public:
// constructors
StreamErrorHandler (std::ostream& ErrorStream);
///< constructor
};
}
#endif

@ -0,0 +1,20 @@
#ifndef COMPILER_TOKENLOC_H_INCLUDED
#define COMPILER_TOKENLOC_H_INCLUDED
#include <string>
namespace Compiler
{
/// \brief Location of a token in a source file
struct TokenLoc
{
int mColumn;
int mLine;
std::string mLiteral;
TokenLoc() : mColumn (0), mLine (0), mLiteral ("") {}
};
}
#endif // TOKENLOC_H_INCLUDED
Loading…
Cancel
Save