From c0c723bb1b2f01af0c5c0ab3d8aacd814c5fa83f Mon Sep 17 00:00:00 2001 From: elsid Date: Sun, 12 Feb 2023 17:04:06 +0100 Subject: [PATCH 1/3] Add const to read only function --- components/esm3/esmwriter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/esm3/esmwriter.hpp b/components/esm3/esmwriter.hpp index 28fb4859d0..c629685eba 100644 --- a/components/esm3/esmwriter.hpp +++ b/components/esm3/esmwriter.hpp @@ -44,7 +44,7 @@ namespace ESM // Counts how many records we have actually written. // It is a good idea to compare this with the value you wrote into the header (setRecordCount) // It should be the record count you set + 1 (1 additional record for the TES3 header) - int getRecordCount() { return mRecordCount; } + int getRecordCount() const { return mRecordCount; } void setFormatVersion(FormatVersion value); void clearMaster(); From beb017e699e5c421dd47eb854665d9bc45b00bad Mon Sep 17 00:00:00 2001 From: elsid Date: Sat, 11 Feb 2023 00:01:59 +0100 Subject: [PATCH 2/3] Do not truncate too long strings on writing ESM --- apps/openmw_test_suite/CMakeLists.txt | 1 + apps/openmw_test_suite/esm3/testesmwriter.cpp | 54 +++++++++++++++++++ components/esm3/esmwriter.cpp | 5 +- components/esm3/esmwriter.hpp | 2 +- 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 apps/openmw_test_suite/esm3/testesmwriter.cpp diff --git a/apps/openmw_test_suite/CMakeLists.txt b/apps/openmw_test_suite/CMakeLists.txt index 0a487e1022..4fe4cde9d0 100644 --- a/apps/openmw_test_suite/CMakeLists.txt +++ b/apps/openmw_test_suite/CMakeLists.txt @@ -91,6 +91,7 @@ file(GLOB UNITTEST_SRC_FILES esm3/readerscache.cpp esm3/testsaveload.cpp + esm3/testesmwriter.cpp nifosg/testnifloader.cpp ) diff --git a/apps/openmw_test_suite/esm3/testesmwriter.cpp b/apps/openmw_test_suite/esm3/testesmwriter.cpp new file mode 100644 index 0000000000..7ab727bf01 --- /dev/null +++ b/apps/openmw_test_suite/esm3/testesmwriter.cpp @@ -0,0 +1,54 @@ +#include + +#include +#include + +#include +#include +#include + +namespace ESM +{ + namespace + { + using namespace ::testing; + + struct Esm3EsmWriterTest : public Test + { + std::minstd_rand mRandom; + std::uniform_int_distribution mRefIdDistribution{ 'a', 'z' }; + + std::string generateRandomString(std::size_t size) + { + std::string result; + std::generate_n( + std::back_inserter(result), size, [&] { return static_cast(mRefIdDistribution(mRandom)); }); + return result; + } + }; + + TEST_F(Esm3EsmWriterTest, saveShouldThrowExceptionOnWhenTruncatingHeaderStrings) + { + const std::string author = generateRandomString(33); + const std::string description = generateRandomString(257); + + std::stringstream stream; + + ESMWriter writer; + writer.setAuthor(author); + writer.setDescription(description); + writer.setFormatVersion(CurrentSaveGameFormatVersion); + EXPECT_THROW(writer.save(stream), std::runtime_error); + } + + TEST_F(Esm3EsmWriterTest, writeFixedStringShouldThrowExceptionOnTruncate) + { + std::stringstream stream; + + ESMWriter writer; + writer.setFormatVersion(CurrentSaveGameFormatVersion); + writer.save(stream); + EXPECT_THROW(writer.writeFixedSizeString(generateRandomString(33), 32), std::runtime_error); + } + } +} diff --git a/components/esm3/esmwriter.cpp b/components/esm3/esmwriter.cpp index b03b5bceb3..32fd2b0c86 100644 --- a/components/esm3/esmwriter.cpp +++ b/components/esm3/esmwriter.cpp @@ -167,11 +167,14 @@ namespace ESM endRecord(name); } - void ESMWriter::writeFixedSizeString(const std::string& data, int size) + void ESMWriter::writeFixedSizeString(const std::string& data, std::size_t size) { std::string string; if (!data.empty()) string = mEncoder ? mEncoder->getLegacyEnc(data) : data; + if (string.size() > size) + throw std::runtime_error("Fixed string data is too long: \"" + string + "\" (" + + std::to_string(string.size()) + " > " + std::to_string(size) + ")"); string.resize(size); write(string.c_str(), string.size()); } diff --git a/components/esm3/esmwriter.hpp b/components/esm3/esmwriter.hpp index c629685eba..d6b3a630ad 100644 --- a/components/esm3/esmwriter.hpp +++ b/components/esm3/esmwriter.hpp @@ -136,7 +136,7 @@ namespace ESM void startSubRecord(NAME name); void endRecord(NAME name); void endRecord(uint32_t name); - void writeFixedSizeString(const std::string& data, int size); + void writeFixedSizeString(const std::string& data, std::size_t size); void writeHString(const std::string& data); void writeHCString(const std::string& data); void writeName(NAME data); From 80e6d6cbe3a60412898a2d448d16c81c3b73a555 Mon Sep 17 00:00:00 2001 From: elsid Date: Sun, 12 Feb 2023 17:03:01 +0100 Subject: [PATCH 3/3] Support variable size strings in ESM3 --- apps/essimporter/importcellref.cpp | 2 +- apps/openmw_test_suite/esm3/testesmwriter.cpp | 6 +- apps/openmw_test_suite/esm3/testsaveload.cpp | 27 ++++++- components/esm/esmcommon.hpp | 2 + components/esm3/esmreader.cpp | 75 +++++++------------ components/esm3/esmreader.hpp | 6 +- components/esm3/esmwriter.cpp | 21 ++++-- components/esm3/esmwriter.hpp | 2 +- components/esm3/formatversion.hpp | 3 +- components/esm3/loadcont.cpp | 4 +- components/esm3/loadregn.cpp | 4 +- components/esm3/loadscpt.cpp | 4 +- components/esm3/loadtes3.cpp | 8 +- 13 files changed, 91 insertions(+), 73 deletions(-) diff --git a/apps/essimporter/importcellref.cpp b/apps/essimporter/importcellref.cpp index 0af765d4a8..8470fd7cf7 100644 --- a/apps/essimporter/importcellref.cpp +++ b/apps/essimporter/importcellref.cpp @@ -111,7 +111,7 @@ namespace ESSImport { // used power esm.getSubHeader(); - std::string id = esm.getString(32); + std::string id = esm.getMaybeFixedStringSize(32); (void)id; // timestamp can't be used: this is the total hours passed, calculated by // timestamp = 24 * (365 * year + cumulativeDays[month] + day) diff --git a/apps/openmw_test_suite/esm3/testesmwriter.cpp b/apps/openmw_test_suite/esm3/testesmwriter.cpp index 7ab727bf01..23922d7461 100644 --- a/apps/openmw_test_suite/esm3/testesmwriter.cpp +++ b/apps/openmw_test_suite/esm3/testesmwriter.cpp @@ -37,7 +37,7 @@ namespace ESM ESMWriter writer; writer.setAuthor(author); writer.setDescription(description); - writer.setFormatVersion(CurrentSaveGameFormatVersion); + writer.setFormatVersion(MaxLimitedSizeStringsFormatVersion); EXPECT_THROW(writer.save(stream), std::runtime_error); } @@ -46,9 +46,9 @@ namespace ESM std::stringstream stream; ESMWriter writer; - writer.setFormatVersion(CurrentSaveGameFormatVersion); + writer.setFormatVersion(MaxLimitedSizeStringsFormatVersion); writer.save(stream); - EXPECT_THROW(writer.writeFixedSizeString(generateRandomString(33), 32), std::runtime_error); + EXPECT_THROW(writer.writeMaybeFixedSizeString(generateRandomString(33), 32), std::runtime_error); } } } diff --git a/apps/openmw_test_suite/esm3/testsaveload.cpp b/apps/openmw_test_suite/esm3/testsaveload.cpp index d14c108c00..981c3f3ee3 100644 --- a/apps/openmw_test_suite/esm3/testsaveload.cpp +++ b/apps/openmw_test_suite/esm3/testsaveload.cpp @@ -17,6 +17,7 @@ namespace ESM using namespace ::testing; constexpr std::array formats = { + MaxLimitedSizeStringsFormatVersion, CurrentSaveGameFormatVersion, }; @@ -74,15 +75,37 @@ namespace ESM std::minstd_rand mRandom; std::uniform_int_distribution mRefIdDistribution{ 'a', 'z' }; - RefId generateRandomRefId(std::size_t size = 33) + std::string generateRandomString(std::size_t size) { std::string value; while (value.size() < size) value.push_back(static_cast(mRefIdDistribution(mRandom))); - return RefId::stringRefId(value); + return value; } + + RefId generateRandomRefId(std::size_t size = 33) { return RefId::stringRefId(generateRandomString(size)); } }; + TEST_F(Esm3SaveLoadRecordTest, headerShouldNotChange) + { + const std::string author = generateRandomString(33); + const std::string description = generateRandomString(257); + + auto stream = std::make_unique(); + + ESMWriter writer; + writer.setAuthor(author); + writer.setDescription(description); + writer.setFormatVersion(CurrentSaveGameFormatVersion); + writer.save(*stream); + writer.close(); + + ESMReader reader; + reader.open(std::move(stream), "stream"); + EXPECT_EQ(reader.getAuthor(), author); + EXPECT_EQ(reader.getDesc(), description); + } + TEST_P(Esm3SaveLoadRecordTest, playerShouldNotChange) { std::minstd_rand random; diff --git a/components/esm/esmcommon.hpp b/components/esm/esmcommon.hpp index b2da3a7f41..e92ae06806 100644 --- a/components/esm/esmcommon.hpp +++ b/components/esm/esmcommon.hpp @@ -28,6 +28,8 @@ namespace ESM FLAG_Blocked = 0x00002000 }; + using StringSizeType = std::uint32_t; + template struct FixedString { diff --git a/components/esm3/esmreader.cpp b/components/esm3/esmreader.cpp index a6ed122ed5..453b96a286 100644 --- a/components/esm3/esmreader.cpp +++ b/components/esm3/esmreader.cpp @@ -144,6 +144,11 @@ namespace ESM } std::string ESMReader::getHString() + { + return std::string(getHStringView()); + } + + std::string_view ESMReader::getHStringView() { getSubHeader(); @@ -158,31 +163,15 @@ namespace ESM mCtx.leftRec--; char c; getT(c); - return std::string(); + return std::string_view(); } - return getString(mCtx.leftSub); + return getStringView(mCtx.leftSub); } RefId ESMReader::getRefId() { - getSubHeader(); - - // Hack to make MultiMark.esp load. Zero-length strings do not - // occur in any of the official mods, but MultiMark makes use of - // them. For some reason, they break the rules, and contain a byte - // (value 0) even if the header says there is no data. If - // Morrowind accepts it, so should we. - if (mCtx.leftSub == 0 && hasMoreSubs() && !mEsm->peek()) - { - // Skip the following zero byte - mCtx.leftRec--; - char c; - getT(c); - return ESM::RefId::sEmpty; - } - - return getRefId(mCtx.leftSub); + return ESM::RefId::stringRefId(getHStringView()); } void ESMReader::skipHString() @@ -363,52 +352,42 @@ namespace ESM * *************************************************************************/ - std::string ESMReader::getString(int size) + std::string ESMReader::getMaybeFixedStringSize(std::size_t size) { - size_t s = size; - if (mBuffer.size() <= s) - // Add some extra padding to reduce the chance of having to resize - // again later. - mBuffer.resize(3 * s); - - // And make sure the string is zero terminated - mBuffer[s] = 0; - - // read ESM data - char* ptr = mBuffer.data(); - getExact(ptr, size); - - size = static_cast(strnlen(ptr, size)); - - // Convert to UTF8 and return - if (mEncoder) - return std::string(mEncoder->getUtf8(std::string_view(ptr, size))); + if (mHeader.mFormatVersion > MaxLimitedSizeStringsFormatVersion) + { + StringSizeType storedSize = 0; + getT(storedSize); + if (storedSize > mCtx.leftSub) + fail("String does not fit subrecord (" + std::to_string(storedSize) + " > " + + std::to_string(mCtx.leftSub) + ")"); + size = static_cast(storedSize); + } - return std::string(ptr, size); + return std::string(getStringView(size)); } - ESM::RefId ESMReader::getRefId(int size) + std::string_view ESMReader::getStringView(std::size_t size) { - size_t s = size; - if (mBuffer.size() <= s) + if (mBuffer.size() <= size) // Add some extra padding to reduce the chance of having to resize // again later. - mBuffer.resize(3 * s); + mBuffer.resize(3 * size); // And make sure the string is zero terminated - mBuffer[s] = 0; + mBuffer[size] = 0; // read ESM data char* ptr = mBuffer.data(); getExact(ptr, size); - size = static_cast(strnlen(ptr, size)); + size = strnlen(ptr, size); // Convert to UTF8 and return - if (mEncoder) - return ESM::RefId::stringRefId(mEncoder->getUtf8(std::string_view(ptr, size))); + if (mEncoder != nullptr) + return mEncoder->getUtf8(std::string_view(ptr, size)); - return ESM::RefId::stringRefId(std::string_view(ptr, size)); + return std::string_view(ptr, size); } [[noreturn]] void ESMReader::fail(const std::string& msg) diff --git a/components/esm3/esmreader.hpp b/components/esm3/esmreader.hpp index b398b4aee5..ecd17777f5 100644 --- a/components/esm3/esmreader.hpp +++ b/components/esm3/esmreader.hpp @@ -174,6 +174,7 @@ namespace ESM // Read a string, including the sub-record header (but not the name) std::string getHString(); + std::string_view getHStringView(); RefId getRefId(); void skipHString(); @@ -269,10 +270,11 @@ namespace ESM void getName(NAME& name) { getT(name); } void getUint(uint32_t& u) { getT(u); } + std::string getMaybeFixedStringSize(std::size_t size); + // Read the next 'size' bytes and return them as a string. Converts // them from native encoding to UTF8 in the process. - std::string getString(int size); - ESM::RefId getRefId(int size); + std::string_view getStringView(std::size_t size); void skip(std::size_t bytes) { diff --git a/components/esm3/esmwriter.cpp b/components/esm3/esmwriter.cpp index 32fd2b0c86..022b00f93f 100644 --- a/components/esm3/esmwriter.cpp +++ b/components/esm3/esmwriter.cpp @@ -167,15 +167,26 @@ namespace ESM endRecord(name); } - void ESMWriter::writeFixedSizeString(const std::string& data, std::size_t size) + void ESMWriter::writeMaybeFixedSizeString(const std::string& data, std::size_t size) { std::string string; if (!data.empty()) string = mEncoder ? mEncoder->getLegacyEnc(data) : data; - if (string.size() > size) - throw std::runtime_error("Fixed string data is too long: \"" + string + "\" (" - + std::to_string(string.size()) + " > " + std::to_string(size) + ")"); - string.resize(size); + if (mHeader.mFormatVersion <= MaxLimitedSizeStringsFormatVersion) + { + if (string.size() > size) + throw std::runtime_error("Fixed string data is too long: \"" + string + "\" (" + + std::to_string(string.size()) + " > " + std::to_string(size) + ")"); + string.resize(size); + } + else + { + constexpr StringSizeType maxSize = std::numeric_limits::max(); + if (string.size() > maxSize) + throw std::runtime_error("String size is too long: \"" + string.substr(0, 64) + "<...>\" (" + + std::to_string(string.size()) + " > " + std::to_string(maxSize) + ")"); + writeT(static_cast(string.size())); + } write(string.c_str(), string.size()); } diff --git a/components/esm3/esmwriter.hpp b/components/esm3/esmwriter.hpp index d6b3a630ad..6d22b70879 100644 --- a/components/esm3/esmwriter.hpp +++ b/components/esm3/esmwriter.hpp @@ -136,7 +136,7 @@ namespace ESM void startSubRecord(NAME name); void endRecord(NAME name); void endRecord(uint32_t name); - void writeFixedSizeString(const std::string& data, std::size_t size); + void writeMaybeFixedSizeString(const std::string& data, std::size_t size); void writeHString(const std::string& data); void writeHCString(const std::string& data); void writeName(NAME data); diff --git a/components/esm3/formatversion.hpp b/components/esm3/formatversion.hpp index 888229ebce..47c08988a7 100644 --- a/components/esm3/formatversion.hpp +++ b/components/esm3/formatversion.hpp @@ -19,7 +19,8 @@ namespace ESM inline constexpr FormatVersion MaxOldAiPackageFormatVersion = 17; inline constexpr FormatVersion MaxOldSkillsAndAttributesFormatVersion = 18; inline constexpr FormatVersion MaxOldCreatureStatsFormatVersion = 19; - inline constexpr FormatVersion CurrentSaveGameFormatVersion = 22; + inline constexpr FormatVersion MaxLimitedSizeStringsFormatVersion = 22; + inline constexpr FormatVersion CurrentSaveGameFormatVersion = 23; } #endif diff --git a/components/esm3/loadcont.cpp b/components/esm3/loadcont.cpp index f52cfbc557..d247cba07b 100644 --- a/components/esm3/loadcont.cpp +++ b/components/esm3/loadcont.cpp @@ -12,7 +12,7 @@ namespace ESM esm.getSubHeader(); ContItem ci; esm.getT(ci.mCount); - ci.mItem = ESM::RefId::stringRefId(esm.getString(32)); + ci.mItem = ESM::RefId::stringRefId(esm.getMaybeFixedStringSize(32)); mList.push_back(ci); } @@ -22,7 +22,7 @@ namespace ESM { esm.startSubRecord("NPCO"); esm.writeT(it->mCount); - esm.writeFixedSizeString(it->mItem.getRefIdString(), 32); + esm.writeMaybeFixedSizeString(it->mItem.getRefIdString(), 32); esm.endRecord("NPCO"); } } diff --git a/components/esm3/loadregn.cpp b/components/esm3/loadregn.cpp index a4470eac9f..84b448dabc 100644 --- a/components/esm3/loadregn.cpp +++ b/components/esm3/loadregn.cpp @@ -53,7 +53,7 @@ namespace ESM { esm.getSubHeader(); SoundRef sr; - sr.mSound = ESM::RefId::stringRefId(esm.getString(32)); + sr.mSound = ESM::RefId::stringRefId(esm.getMaybeFixedStringSize(32)); esm.getT(sr.mChance); mSoundList.push_back(sr); break; @@ -95,7 +95,7 @@ namespace ESM for (std::vector::const_iterator it = mSoundList.begin(); it != mSoundList.end(); ++it) { esm.startSubRecord("SNAM"); - esm.writeFixedSizeString(it->mSound.getRefIdString(), 32); + esm.writeMaybeFixedSizeString(it->mSound.getRefIdString(), 32); esm.writeT(it->mChance); esm.endRecord("SNAM"); } diff --git a/components/esm3/loadscpt.cpp b/components/esm3/loadscpt.cpp index dbba62d0cc..df4459d85b 100644 --- a/components/esm3/loadscpt.cpp +++ b/components/esm3/loadscpt.cpp @@ -98,7 +98,7 @@ namespace ESM case fourCC("SCHD"): { esm.getSubHeader(); - mId = ESM::RefId::stringRefId(esm.getString(32)); + mId = ESM::RefId::stringRefId(esm.getMaybeFixedStringSize(32)); esm.getT(mData); hasHeader = true; @@ -152,7 +152,7 @@ namespace ESM varNameString.append(*it); esm.startSubRecord("SCHD"); - esm.writeFixedSizeString(mId.getRefIdString(), 32); + esm.writeMaybeFixedSizeString(mId.getRefIdString(), 32); esm.writeT(mData, 20); esm.endRecord("SCHD"); diff --git a/components/esm3/loadtes3.cpp b/components/esm3/loadtes3.cpp index 2bd55ad84b..b80f8af0cb 100644 --- a/components/esm3/loadtes3.cpp +++ b/components/esm3/loadtes3.cpp @@ -30,8 +30,8 @@ namespace ESM esm.getSubHeader(); esm.getT(mData.version); esm.getT(mData.type); - mData.author.assign(esm.getString(32)); - mData.desc.assign(esm.getString(256)); + mData.author.assign(esm.getMaybeFixedStringSize(32)); + mData.desc.assign(esm.getMaybeFixedStringSize(256)); esm.getT(mData.records); } @@ -71,8 +71,8 @@ namespace ESM esm.startSubRecord("HEDR"); esm.writeT(mData.version); esm.writeT(mData.type); - esm.writeFixedSizeString(mData.author, 32); - esm.writeFixedSizeString(mData.desc, 256); + esm.writeMaybeFixedSizeString(mData.author, 32); + esm.writeMaybeFixedSizeString(mData.desc, 256); esm.writeT(mData.records); esm.endRecord("HEDR");