1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-01-19 20:23:54 +00:00
openmw/components/esm3/esmreader.cpp
florent.teppe 65cdd489fb create a specific esm reader function for RefID to avoid allocation for string and then again for RefId
Fixed some types

removed useless header

applied clang format

fixed compile tests

fixed clang tidy, and closer to logic before this MR

Removed hardcoded refids

unless there is a returned value we don't use static RefIds
can use == between RefId and hardcoded string

Fix clang format

Fixed a few instances where std::string was used, when only const std::string& was needed

removed unused variable
2022-12-27 19:15:57 +01:00

423 lines
12 KiB
C++

#include "esmreader.hpp"
#include "readerscache.hpp"
#include <components/files/conversion.hpp>
#include <components/files/openfile.hpp>
#include <components/misc/strings/algorithm.hpp>
#include <filesystem>
#include <fstream>
#include <sstream>
#include <stdexcept>
namespace ESM
{
ESM_Context ESMReader::getContext()
{
// Update the file position before returning
mCtx.filePos = mEsm->tellg();
return mCtx;
}
ESMReader::ESMReader()
: mRecordFlags(0)
, mBuffer(50 * 1024)
, mEncoder(nullptr)
, mFileSize(0)
{
clearCtx();
mCtx.index = 0;
}
void ESMReader::restoreContext(const ESM_Context& rc)
{
// Reopen the file if necessary
if (mCtx.filename != rc.filename)
openRaw(rc.filename);
// Copy the data
mCtx = rc;
// Make sure we seek to the right place
mEsm->seekg(mCtx.filePos);
}
void ESMReader::close()
{
mEsm.reset();
clearCtx();
mHeader.blank();
}
void ESMReader::clearCtx()
{
mCtx.filename.clear();
mCtx.leftFile = 0;
mCtx.leftRec = 0;
mCtx.leftSub = 0;
mCtx.subCached = false;
mCtx.recName.clear();
mCtx.subName.clear();
}
void ESMReader::resolveParentFileIndices(ReadersCache& readers)
{
mCtx.parentFileIndices.clear();
for (const Header::MasterData& mast : getGameFiles())
{
const std::string& fname = mast.name;
int index = getIndex();
for (int i = 0; i < getIndex(); i++)
{
const ESM::ReadersCache::BusyItem reader = readers.get(static_cast<std::size_t>(i));
if (reader->getFileSize() == 0)
continue; // Content file in non-ESM format
const auto fnamecandidate = Files::pathToUnicodeString(reader->getName().filename());
if (Misc::StringUtils::ciEqual(fname, fnamecandidate))
{
index = i;
break;
}
}
mCtx.parentFileIndices.push_back(index);
}
}
void ESMReader::openRaw(std::unique_ptr<std::istream>&& stream, const std::filesystem::path& name)
{
close();
mEsm = std::move(stream);
mCtx.filename = name;
mEsm->seekg(0, mEsm->end);
mCtx.leftFile = mFileSize = mEsm->tellg();
mEsm->seekg(0, mEsm->beg);
}
void ESMReader::openRaw(const std::filesystem::path& filename)
{
openRaw(Files::openBinaryInputFileStream(filename), filename);
}
void ESMReader::open(std::unique_ptr<std::istream>&& stream, const std::filesystem::path& name)
{
openRaw(std::move(stream), name);
if (getRecName() != "TES3")
fail("Not a valid Morrowind file");
getRecHeader();
mHeader.load(*this);
}
void ESMReader::open(const std::filesystem::path& file)
{
open(Files::openBinaryInputFileStream(file), file);
}
std::string ESMReader::getHNOString(NAME name)
{
if (isNextSub(name))
return getHString();
return "";
}
ESM::RefId ESMReader::getHNORefId(NAME name)
{
if (isNextSub(name))
return getRefId();
return ESM::RefId::sEmpty;
}
void ESMReader::skipHNOString(NAME name)
{
if (isNextSub(name))
skipHString();
}
std::string ESMReader::getHNString(NAME name)
{
getSubNameIs(name);
return getHString();
}
std::string ESMReader::getHString()
{
getSubHeader();
// Hack to make MultiMark.esp load. Zero-length strings do not
// occur in any of the official mods, but MultiMark makes use of
// them. For some reason, they break the rules, and contain a byte
// (value 0) even if the header says there is no data. If
// Morrowind accepts it, so should we.
if (mCtx.leftSub == 0 && hasMoreSubs() && !mEsm->peek())
{
// Skip the following zero byte
mCtx.leftRec--;
char c;
getT(c);
return std::string();
}
return getString(mCtx.leftSub);
}
RefId ESMReader::getRefId()
{
getSubHeader();
// Hack to make MultiMark.esp load. Zero-length strings do not
// occur in any of the official mods, but MultiMark makes use of
// them. For some reason, they break the rules, and contain a byte
// (value 0) even if the header says there is no data. If
// Morrowind accepts it, so should we.
if (mCtx.leftSub == 0 && hasMoreSubs() && !mEsm->peek())
{
// Skip the following zero byte
mCtx.leftRec--;
char c;
getT(c);
return ESM::RefId::sEmpty;
}
return getRefId(mCtx.leftSub);
}
void ESMReader::skipHString()
{
getSubHeader();
// Hack to make MultiMark.esp load. Zero-length strings do not
// occur in any of the official mods, but MultiMark makes use of
// them. For some reason, they break the rules, and contain a byte
// (value 0) even if the header says there is no data. If
// Morrowind accepts it, so should we.
if (mCtx.leftSub == 0 && hasMoreSubs() && !mEsm->peek())
{
// Skip the following zero byte
mCtx.leftRec--;
skipT<char>();
return;
}
skip(mCtx.leftSub);
}
void ESMReader::getHExact(void* p, int size)
{
getSubHeader();
if (size != static_cast<int>(mCtx.leftSub))
reportSubSizeMismatch(size, mCtx.leftSub);
getExact(p, size);
}
// Read the given number of bytes from a named subrecord
void ESMReader::getHNExact(void* p, int size, NAME name)
{
getSubNameIs(name);
getHExact(p, size);
}
// Get the next subrecord name and check if it matches the parameter
void ESMReader::getSubNameIs(NAME name)
{
getSubName();
if (mCtx.subName != name)
fail("Expected subrecord " + name.toString() + " but got " + mCtx.subName.toString());
}
bool ESMReader::isNextSub(NAME name)
{
if (!hasMoreSubs())
return false;
getSubName();
// If the name didn't match, then mark the it as 'cached' so it's
// available for the next call to getSubName.
mCtx.subCached = (mCtx.subName != name);
// If subCached is false, then subName == name.
return !mCtx.subCached;
}
bool ESMReader::peekNextSub(NAME name)
{
if (!hasMoreSubs())
return false;
getSubName();
mCtx.subCached = true;
return mCtx.subName == name;
}
// Read subrecord name. This gets called a LOT, so I've optimized it
// slightly.
void ESMReader::getSubName()
{
// If the name has already been read, do nothing
if (mCtx.subCached)
{
mCtx.subCached = false;
return;
}
// reading the subrecord data anyway.
const std::size_t subNameSize = decltype(mCtx.subName)::sCapacity;
getExact(mCtx.subName.mData, static_cast<int>(subNameSize));
mCtx.leftRec -= static_cast<std::uint32_t>(subNameSize);
}
void ESMReader::skipHSub()
{
getSubHeader();
skip(mCtx.leftSub);
}
void ESMReader::skipHSubSize(int size)
{
skipHSub();
if (static_cast<int>(mCtx.leftSub) != size)
reportSubSizeMismatch(mCtx.leftSub, size);
}
void ESMReader::skipHSubUntil(NAME name)
{
while (hasMoreSubs() && !isNextSub(name))
{
mCtx.subCached = false;
skipHSub();
}
if (hasMoreSubs())
mCtx.subCached = true;
}
void ESMReader::getSubHeader()
{
if (mCtx.leftRec < sizeof(mCtx.leftSub))
fail("End of record while reading sub-record header");
// Get subrecord size
getT(mCtx.leftSub);
mCtx.leftRec -= sizeof(mCtx.leftSub);
// Adjust number of record bytes left
if (mCtx.leftRec < mCtx.leftSub)
fail("Record size is larger than rest of file");
mCtx.leftRec -= mCtx.leftSub;
}
NAME ESMReader::getRecName()
{
if (!hasMoreRecs())
fail("No more records, getRecName() failed");
getName(mCtx.recName);
mCtx.leftFile -= decltype(mCtx.recName)::sCapacity;
// Make sure we don't carry over any old cached subrecord
// names. This can happen in some cases when we skip parts of a
// record.
mCtx.subCached = false;
return mCtx.recName;
}
void ESMReader::skipRecord()
{
skip(mCtx.leftRec);
mCtx.leftRec = 0;
mCtx.subCached = false;
}
void ESMReader::getRecHeader(uint32_t& flags)
{
// General error checking
if (mCtx.leftFile < 3 * sizeof(uint32_t))
fail("End of file while reading record header");
if (mCtx.leftRec)
fail("Previous record contains unread bytes");
getUint(mCtx.leftRec);
getUint(flags); // This header entry is always zero
getUint(flags);
mCtx.leftFile -= 3 * sizeof(uint32_t);
// Check that sizes add up
if (mCtx.leftFile < mCtx.leftRec)
reportSubSizeMismatch(mCtx.leftFile, mCtx.leftRec);
// Adjust number of bytes mCtx.left in file
mCtx.leftFile -= mCtx.leftRec;
}
/*************************************************************************
*
* Lowest level data reading and misc methods
*
*************************************************************************/
std::string ESMReader::getString(int size)
{
size_t s = size;
if (mBuffer.size() <= s)
// Add some extra padding to reduce the chance of having to resize
// again later.
mBuffer.resize(3 * s);
// And make sure the string is zero terminated
mBuffer[s] = 0;
// read ESM data
char* ptr = mBuffer.data();
getExact(ptr, size);
size = static_cast<int>(strnlen(ptr, size));
// Convert to UTF8 and return
if (mEncoder)
return std::string(mEncoder->getUtf8(std::string_view(ptr, size)));
return std::string(ptr, size);
}
ESM::RefId ESMReader::getRefId(int size)
{
size_t s = size;
if (mBuffer.size() <= s)
// Add some extra padding to reduce the chance of having to resize
// again later.
mBuffer.resize(3 * s);
// And make sure the string is zero terminated
mBuffer[s] = 0;
// read ESM data
char* ptr = mBuffer.data();
getExact(ptr, size);
size = static_cast<int>(strnlen(ptr, size));
// Convert to UTF8 and return
if (mEncoder)
return ESM::RefId::stringRefId(mEncoder->getUtf8(std::string_view(ptr, size)));
return ESM::RefId::stringRefId(std::string_view(ptr, size));
}
[[noreturn]] void ESMReader::fail(const std::string& msg)
{
std::stringstream ss;
ss << "ESM Error: " << msg;
ss << "\n File: " << Files::pathToUnicodeString(mCtx.filename);
ss << "\n Record: " << mCtx.recName.toStringView();
ss << "\n Subrecord: " << mCtx.subName.toStringView();
if (mEsm.get())
ss << "\n Offset: 0x" << std::hex << mEsm->tellg();
throw std::runtime_error(ss.str());
}
}