You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
openmw/components/esm3/esmreader.cpp

531 lines
15 KiB
C++

#include "esmreader.hpp"
#include "readerscache.hpp"
#include <components/esm3/cellid.hpp>
#include <components/esm3/loadcell.hpp>
#include <components/files/conversion.hpp>
#include <components/files/openfile.hpp>
#include <components/misc/strings/algorithm.hpp>
#include <filesystem>
#include <fstream>
#include <sstream>
#include <stdexcept>
namespace ESM
{
ESM_Context ESMReader::getContext()
{
// Update the file position before returning
mCtx.filePos = mEsm->tellg();
return mCtx;
}
ESMReader::ESMReader()
: mRecordFlags(0)
, mBuffer(50 * 1024)
, mEncoder(nullptr)
, mFileSize(0)
{
clearCtx();
mCtx.index = 0;
}
void ESMReader::restoreContext(const ESM_Context& rc)
{
// Reopen the file if necessary
if (mCtx.filename != rc.filename)
openRaw(rc.filename);
// Copy the data
mCtx = rc;
// Make sure we seek to the right place
mEsm->seekg(mCtx.filePos);
}
void ESMReader::close()
{
mEsm.reset();
clearCtx();
mHeader.blank();
}
void ESMReader::clearCtx()
{
mCtx.filename.clear();
mCtx.leftFile = 0;
mCtx.leftRec = 0;
mCtx.leftSub = 0;
mCtx.subCached = false;
mCtx.recName.clear();
mCtx.subName.clear();
}
void ESMReader::resolveParentFileIndices(ReadersCache& readers)
{
mCtx.parentFileIndices.clear();
for (const Header::MasterData& mast : getGameFiles())
{
const std::string& fname = mast.name;
int index = getIndex();
for (int i = 0; i < getIndex(); i++)
{
const ESM::ReadersCache::BusyItem reader = readers.get(static_cast<std::size_t>(i));
if (reader->getFileSize() == 0)
continue; // Content file in non-ESM format
const auto fnamecandidate = Files::pathToUnicodeString(reader->getName().filename());
if (Misc::StringUtils::ciEqual(fname, fnamecandidate))
{
index = i;
break;
}
}
mCtx.parentFileIndices.push_back(index);
}
}
bool ESMReader::applyContentFileMapping(FormId& id)
{
if (mContentFileMapping && id.hasContentFile())
{
auto iter = mContentFileMapping->find(id.mContentFile);
if (iter == mContentFileMapping->end())
return false;
id.mContentFile = iter->second;
}
return true;
}
ESM::RefId ESMReader::getCellId()
{
if (mHeader.mFormatVersion <= ESM::MaxUseEsmCellIdFormatVersion)
{
ESM::CellId cellId;
cellId.load(*this);
if (cellId.mPaged)
{
return ESM::RefId::esm3ExteriorCell(cellId.mIndex.mX, cellId.mIndex.mY);
}
else
{
return ESM::RefId::stringRefId(cellId.mWorldspace);
}
}
return getHNRefId("NAME");
}
void ESMReader::openRaw(std::unique_ptr<std::istream>&& stream, const std::filesystem::path& name)
{
close();
mEsm = std::move(stream);
mCtx.filename = name;
mEsm->seekg(0, mEsm->end);
mCtx.leftFile = mFileSize = mEsm->tellg();
mEsm->seekg(0, mEsm->beg);
}
void ESMReader::openRaw(const std::filesystem::path& filename)
{
openRaw(Files::openBinaryInputFileStream(filename), filename);
}
void ESMReader::open(std::unique_ptr<std::istream>&& stream, const std::filesystem::path& name)
{
openRaw(std::move(stream), name);
if (getRecName() != "TES3")
fail("Not a valid Morrowind file");
getRecHeader();
mHeader.load(*this);
}
void ESMReader::open(const std::filesystem::path& file)
{
open(Files::openBinaryInputFileStream(file), file);
}
std::string ESMReader::getHNOString(NAME name)
{
if (isNextSub(name))
return getHString();
return {};
}
ESM::RefId ESMReader::getHNORefId(NAME name)
{
if (isNextSub(name))
return getRefId();
return ESM::RefId();
}
void ESMReader::skipHNORefId(NAME name)
{
if (isNextSub(name))
skipHRefId();
}
std::string ESMReader::getHNString(NAME name)
{
getSubNameIs(name);
return getHString();
}
RefId ESMReader::getHNRefId(NAME name)
{
getSubNameIs(name);
return getRefId();
}
std::string ESMReader::getHString()
{
return std::string(getHStringView());
}
std::string_view ESMReader::getHStringView()
{
getSubHeader();
if (mHeader.mFormatVersion > MaxStringRefIdFormatVersion)
return getStringView(mCtx.leftSub);
// Hack to make MultiMark.esp load. Zero-length strings do not
// occur in any of the official mods, but MultiMark makes use of
// them. For some reason, they break the rules, and contain a byte
// (value 0) even if the header says there is no data. If
// Morrowind accepts it, so should we.
if (mCtx.leftSub == 0 && hasMoreSubs() && !mEsm->peek())
{
// Skip the following zero byte
mCtx.leftRec--;
char c;
getT(c);
return std::string_view();
}
return getStringView(mCtx.leftSub);
}
RefId ESMReader::getRefId()
{
if (mHeader.mFormatVersion <= MaxStringRefIdFormatVersion)
return ESM::RefId::stringRefId(getHStringView());
getSubHeader();
return getRefIdImpl(mCtx.leftSub);
}
Initial commit: In ESM structures, replace the string members that are RefIds to other records, to a new strong type The strong type is actually just a string underneath, but this will help in the future to have a distinction so it's easier to search and replace when we use an integer ID Slowly going through all the changes to make, still hundreds of errors a lot of functions/structures use std::string or stringview to designate an ID. So it takes time Continues slowly replacing ids. There are technically more and more compilation errors I have good hope that there is a point where the amount of errors will dramatically go down as all the main functions use the ESM::RefId type Continue moving forward, changes to the stores slowly moving along Starting to see the fruit of those changes. still many many error, but more and more Irun into a situation where a function is sandwiched between two functions that use the RefId type. More replacements. Things are starting to get easier I can see more and more often the issue is that the function is awaiting a RefId, but is given a string there is less need to go down functions and to fix a long list of them. Still moving forward, and for the first time error count is going down! Good pace, not sure about topics though, mId and mName are actually the same thing and are used interchangeably Cells are back to using string for the name, haven't fixed everything yet. Many other changes Under the bar of 400 compilation errors. more good progress <100 compile errors! More progress Game settings store can use string for find, it was a bit absurd how every use of it required to create refId from string some more progress on other fronts Mostly game settings clean one error opened a lot of other errors. Down to 18, but more will prbably appear only link errors left?? Fixed link errors OpenMW compiles, and launches, with some issues, but still!
2 years ago
void ESMReader::skipHString()
{
getSubHeader();
// Hack to make MultiMark.esp load. Zero-length strings do not
// occur in any of the official mods, but MultiMark makes use of
// them. For some reason, they break the rules, and contain a byte
// (value 0) even if the header says there is no data. If
// Morrowind accepts it, so should we.
if (mHeader.mFormatVersion <= MaxStringRefIdFormatVersion && mCtx.leftSub == 0 && hasMoreSubs()
&& !mEsm->peek())
{
// Skip the following zero byte
mCtx.leftRec--;
skipT<char>();
return;
}
skip(mCtx.leftSub);
}
void ESMReader::skipHRefId()
{
skipHString();
}
FormId ESMReader::getFormId(bool wide, NAME tag)
{
FormId res;
if (wide)
getHNT(tag, res.mIndex, res.mContentFile);
else
getHNT(res.mIndex, tag);
return res;
}
// Get the next subrecord name and check if it matches the parameter
void ESMReader::getSubNameIs(NAME name)
{
getSubName();
if (mCtx.subName != name)
fail("Expected subrecord " + name.toString() + " but got " + mCtx.subName.toString());
}
bool ESMReader::isNextSub(NAME name)
{
if (!hasMoreSubs())
return false;
getSubName();
// If the name didn't match, then mark the it as 'cached' so it's
// available for the next call to getSubName.
mCtx.subCached = (mCtx.subName != name);
// If subCached is false, then subName == name.
return !mCtx.subCached;
}
bool ESMReader::peekNextSub(NAME name)
{
if (!hasMoreSubs())
return false;
getSubName();
mCtx.subCached = true;
return mCtx.subName == name;
}
// Read subrecord name. This gets called a LOT, so I've optimized it
// slightly.
void ESMReader::getSubName()
{
// If the name has already been read, do nothing
if (mCtx.subCached)
{
mCtx.subCached = false;
return;
}
// reading the subrecord data anyway.
const std::size_t subNameSize = decltype(mCtx.subName)::sCapacity;
getExact(mCtx.subName.mData, subNameSize);
mCtx.leftRec -= static_cast<std::uint32_t>(subNameSize);
}
void ESMReader::skipHSub()
{
getSubHeader();
skip(mCtx.leftSub);
}
10 months ago
void ESMReader::skipHSubSize(std::size_t size)
{
skipHSub();
10 months ago
if (mCtx.leftSub != size)
reportSubSizeMismatch(mCtx.leftSub, size);
}
void ESMReader::skipHSubUntil(NAME name)
{
while (hasMoreSubs() && !isNextSub(name))
{
mCtx.subCached = false;
skipHSub();
}
if (hasMoreSubs())
mCtx.subCached = true;
}
void ESMReader::getSubHeader()
{
if (mCtx.leftRec < static_cast<std::streamsize>(sizeof(mCtx.leftSub)))
fail("End of record while reading sub-record header: " + std::to_string(mCtx.leftRec) + " < "
+ std::to_string(sizeof(mCtx.leftSub)));
// Get subrecord size
getUint(mCtx.leftSub);
mCtx.leftRec -= sizeof(mCtx.leftSub);
// Adjust number of record bytes left; may go negative
mCtx.leftRec -= mCtx.leftSub;
}
NAME ESMReader::getRecName()
{
if (!hasMoreRecs())
fail("No more records, getRecName() failed");
if (hasMoreSubs())
fail("Previous record contains unread bytes");
// We went out of the previous record's bounds. Backtrack.
if (mCtx.leftRec < 0)
mEsm->seekg(mCtx.leftRec, std::ios::cur);
getName(mCtx.recName);
mCtx.leftFile -= decltype(mCtx.recName)::sCapacity;
// Make sure we don't carry over any old cached subrecord
// names. This can happen in some cases when we skip parts of a
// record.
mCtx.subCached = false;
return mCtx.recName;
}
void ESMReader::skipRecord()
{
skip(mCtx.leftRec);
mCtx.leftRec = 0;
mCtx.subCached = false;
}
void ESMReader::getRecHeader(uint32_t& flags)
{
if (mCtx.leftFile < static_cast<std::streamsize>(3 * sizeof(uint32_t)))
fail("End of file while reading record header");
std::uint32_t leftRec = 0;
getUint(leftRec);
mCtx.leftRec = static_cast<std::streamsize>(leftRec);
getUint(flags); // This header entry is always zero
getUint(flags);
mCtx.leftFile -= 3 * sizeof(uint32_t);
// Check that sizes add up
if (mCtx.leftFile < mCtx.leftRec)
reportSubSizeMismatch(mCtx.leftFile, mCtx.leftRec);
// Adjust number of bytes mCtx.left in file
mCtx.leftFile -= mCtx.leftRec;
}
/*************************************************************************
*
* Lowest level data reading and misc methods
*
*************************************************************************/
std::string ESMReader::getMaybeFixedStringSize(std::size_t size)
{
if (mHeader.mFormatVersion > MaxLimitedSizeStringsFormatVersion)
{
StringSizeType storedSize = 0;
getT(storedSize);
if (storedSize > mCtx.leftSub)
fail("String does not fit subrecord (" + std::to_string(storedSize) + " > "
+ std::to_string(mCtx.leftSub) + ")");
size = static_cast<std::size_t>(storedSize);
}
return std::string(getStringView(size));
}
RefId ESMReader::getMaybeFixedRefIdSize(std::size_t size)
{
if (mHeader.mFormatVersion <= MaxStringRefIdFormatVersion)
return RefId::stringRefId(getMaybeFixedStringSize(size));
return getRefIdImpl(mCtx.leftSub);
}
std::string_view ESMReader::getStringView(std::size_t size)
{
if (mBuffer.size() <= size)
// Add some extra padding to reduce the chance of having to resize
// again later.
mBuffer.resize(3 * size);
// And make sure the string is zero terminated
mBuffer[size] = 0;
// read ESM data
char* ptr = mBuffer.data();
getExact(ptr, size);
size = strnlen(ptr, size);
// Convert to UTF8 and return
if (mEncoder != nullptr)
return mEncoder->getUtf8(std::string_view(ptr, size));
return std::string_view(ptr, size);
}
RefId ESMReader::getRefId(std::size_t size)
{
if (mHeader.mFormatVersion <= MaxStringRefIdFormatVersion)
return ESM::RefId::stringRefId(getStringView(size));
return getRefIdImpl(size);
}
RefId ESMReader::getRefIdImpl(std::size_t size)
{
RefIdType refIdType = RefIdType::Empty;
getT(refIdType);
switch (refIdType)
{
case RefIdType::Empty:
return RefId();
case RefIdType::SizedString:
{
const std::size_t minSize = sizeof(refIdType) + sizeof(StringSizeType);
if (size < minSize)
fail("Requested RefId record size is too small (" + std::to_string(size) + " < "
+ std::to_string(minSize) + ")");
StringSizeType storedSize = 0;
getT(storedSize);
const std::size_t maxSize = size - minSize;
if (storedSize > maxSize)
fail("RefId string does not fit subrecord size (" + std::to_string(storedSize) + " > "
+ std::to_string(maxSize) + ")");
return RefId::stringRefId(getStringView(storedSize));
}
case RefIdType::UnsizedString:
if (size < sizeof(refIdType))
fail("Requested RefId record size is too small (" + std::to_string(size) + " < "
+ std::to_string(sizeof(refIdType)) + ")");
return RefId::stringRefId(getStringView(size - sizeof(refIdType)));
case RefIdType::FormId:
{
FormId formId{};
getT(formId.mIndex);
getT(formId.mContentFile);
if (applyContentFileMapping(formId))
return RefId(formId);
else
return RefId(); // content file was removed from load order
}
case RefIdType::Generated:
{
std::uint64_t generated{};
getT(generated);
return RefId::generated(generated);
}
case RefIdType::Index:
{
RecNameInts recordType{};
getExact(&recordType, sizeof(std::uint32_t));
std::uint32_t index{};
getT(index);
return RefId::index(recordType, index);
}
case RefIdType::ESM3ExteriorCell:
{
int32_t x, y;
getT(x);
getT(y);
return RefId::esm3ExteriorCell(x, y);
}
}
fail("Unsupported RefIdType: " + std::to_string(static_cast<unsigned>(refIdType)));
}
[[noreturn]] void ESMReader::fail(std::string_view msg)
{
std::stringstream ss;
ss << "ESM Error: " << msg;
ss << "\n File: " << Files::pathToUnicodeString(mCtx.filename);
ss << "\n Record: " << mCtx.recName.toStringView();
ss << "\n Subrecord: " << mCtx.subName.toStringView();
if (mEsm.get())
ss << "\n Offset: 0x" << std::hex << mEsm->tellg();
throw std::runtime_error(ss.str());
}
}