1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-10-15 21:46:37 +00:00

Merge branch 'fix_bsatool_afl_findings' into 'master'

Fix AFL findings in bsatool

See merge request OpenMW/openmw!4925
This commit is contained in:
Alexei Kotov 2025-10-11 08:57:48 +03:00
commit afe4edc3c3
15 changed files with 936 additions and 163 deletions

View file

@ -170,8 +170,8 @@ int list(std::unique_ptr<File>& bsa, Arguments& info)
// Long format
std::ios::fmtflags f(std::cout.flags());
std::cout << std::setw(50) << std::left << file.name();
std::cout << std::setw(8) << std::left << std::dec << file.fileSize;
std::cout << "@ 0x" << std::hex << file.offset << std::endl;
std::cout << std::setw(8) << std::left << std::dec << file.mFileSize;
std::cout << "@ 0x" << std::hex << file.mOffset << std::endl;
std::cout.flags(f);
}
else

View file

@ -89,6 +89,9 @@ file(GLOB UNITTEST_SRC_FILES
vfs/testpathutil.cpp
sceneutil/osgacontroller.cpp
bsa/testbsafile.cpp
bsa/testcompressedbsafile.cpp
)
source_group(apps\\components-tests FILES ${UNITTEST_SRC_FILES})

View file

@ -0,0 +1,40 @@
#ifndef COMPONETS_TESTS_BSA_OPERATORS_H
#define COMPONETS_TESTS_BSA_OPERATORS_H
#include <components/bsa/bsafile.hpp>
#include <ostream>
#include <tuple>
namespace Bsa
{
inline auto makeTuple(const BSAFile::Hash& value)
{
return std::make_tuple(value.mLow, value.mHigh);
}
inline auto makeTuple(const BSAFile::FileStruct& value)
{
return std::make_tuple(
value.mFileSize, value.mOffset, makeTuple(value.mHash), value.mNameOffset, value.mNameSize, value.name());
}
inline std::ostream& operator<<(std::ostream& stream, const BSAFile::Hash& value)
{
return stream << "Hash { .mLow = " << value.mLow << ", .mHigh = " << value.mHigh << "}";
}
inline std::ostream& operator<<(std::ostream& stream, const BSAFile::FileStruct& value)
{
return stream << "FileStruct { .mFileSize = " << value.mFileSize << ", .mOffset = " << value.mOffset
<< ", .mHash = " << value.mHash << ", .mNameOffset = " << value.mNameOffset
<< ", .mNameSize = " << value.mNameSize << ", .name() = " << value.name() << "}";
}
inline bool operator==(const BSAFile::FileStruct& lhs, const BSAFile::FileStruct& rhs)
{
return makeTuple(lhs) == makeTuple(rhs);
}
}
#endif

View file

@ -0,0 +1,302 @@
#include "operators.hpp"
#include <components/bsa/compressedbsafile.hpp>
#include <components/files/memorystream.hpp>
#include <components/testing/util.hpp>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <cstdint>
#include <filesystem>
#include <format>
#include <fstream>
#include <sstream>
namespace Bsa
{
namespace
{
using namespace ::testing;
struct Header
{
uint32_t mFormat;
uint32_t mDirSize;
uint32_t mFileCount;
};
struct Archive
{
Header mHeader;
std::vector<std::uint32_t> mOffsets;
std::vector<char> mStringBuffer;
std::vector<BSAFile::Hash> mHashes;
std::size_t mTailSize;
};
struct TestBSAFile final : public BSAFile
{
void readHeader(std::istream& input) override { BSAFile::readHeader(input); }
void writeHeader() override { throw std::logic_error("TestBSAFile::writeHeader is not implemented"); }
};
void writeArchive(const Archive& value, std::ostream& stream)
{
stream.write(reinterpret_cast<const char*>(&value.mHeader), sizeof(value.mHeader));
if (!value.mOffsets.empty())
stream.write(reinterpret_cast<const char*>(value.mOffsets.data()),
value.mOffsets.size() * sizeof(std::uint32_t));
if (!value.mStringBuffer.empty())
stream.write(reinterpret_cast<const char*>(value.mStringBuffer.data()), value.mStringBuffer.size());
for (const BSAFile::Hash& hash : value.mHashes)
stream.write(reinterpret_cast<const char*>(&hash), sizeof(BSAFile::Hash));
const std::size_t chunkSize = 4096;
std::vector<char> chunk(chunkSize);
for (std::size_t i = 0; i < value.mTailSize; i += chunkSize)
stream.write(reinterpret_cast<const char*>(chunk.data()), std::min(chunk.size(), value.mTailSize - i));
}
std::filesystem::path makeOutputPath()
{
const auto testInfo = UnitTest::GetInstance()->current_test_info();
return TestingOpenMW::outputFilePath(
std::format("{}.{}.bsa", testInfo->test_suite_name(), testInfo->name()));
}
std::string makeBsaBuffer(std::uint32_t fileSize, std::uint32_t fileOffset)
{
std::string buffer;
buffer.reserve(static_cast<std::size_t>(fileSize) + static_cast<std::size_t>(fileOffset) + 34);
std::ostringstream stream(std::move(buffer));
const Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Uncompressed),
.mDirSize = 14,
.mFileCount = 1,
};
const BSAFile::Hash hash{
.mLow = 0xaaaabbbb,
.mHigh = 0xccccdddd,
};
const Archive archive{
.mHeader = header,
.mOffsets = { fileSize, fileOffset, 0 },
.mStringBuffer = { 'a', '\0' },
.mHashes = { hash },
.mTailSize = 0,
};
writeArchive(archive, stream);
return std::move(stream).str();
}
TEST(BSAFileTest, shouldHandleEmpty)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
}
BSAFile file;
EXPECT_THROW(file.open(path), std::runtime_error);
EXPECT_THAT(file.getList(), IsEmpty());
}
TEST(BSAFileTest, shouldHandleZeroFiles)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Uncompressed),
.mDirSize = 0,
.mFileCount = 0,
};
const Archive archive{
.mHeader = header,
.mOffsets = {},
.mStringBuffer = {},
.mHashes = {},
.mTailSize = 0,
};
writeArchive(archive, stream);
}
BSAFile file;
file.open(path);
EXPECT_THAT(file.getList(), IsEmpty());
}
TEST(BSAFileTest, shouldHandleSingleFile)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Uncompressed),
.mDirSize = 14,
.mFileCount = 1,
};
const BSAFile::Hash hash{
.mLow = 0xaaaabbbb,
.mHigh = 0xccccdddd,
};
const Archive archive{
.mHeader = header,
.mOffsets = { 42, 0, 0 },
.mStringBuffer = { 'a', '\0' },
.mHashes = { hash },
.mTailSize = 42,
};
writeArchive(archive, stream);
}
BSAFile file;
file.open(path);
std::vector<char> namesBuffer = { 'a', '\0' };
EXPECT_THAT(file.getList(),
ElementsAre(BSAFile::FileStruct{
.mFileSize = 42,
.mOffset = 34,
.mHash = BSAFile::Hash{ .mLow = 0xaaaabbbb, .mHigh = 0xccccdddd },
.mNameOffset = 0,
.mNameSize = 1,
.mNamesBuffer = &namesBuffer,
}));
}
TEST(BSAFileTest, shouldHandleTwoFiles)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const std::uint32_t fileSize1 = 42;
const std::uint32_t fileSize2 = 13;
const Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Uncompressed),
.mDirSize = 28,
.mFileCount = 2,
};
const BSAFile::Hash hash1{
.mLow = 0xaaaabbbb,
.mHigh = 0xccccdddd,
};
const BSAFile::Hash hash2{
.mLow = 0x11112222,
.mHigh = 0x33334444,
};
const Archive archive{
.mHeader = header,
.mOffsets = { fileSize1, 0, fileSize2, fileSize1, 0, 2 },
.mStringBuffer = { 'a', '\0', 'b', '\0' },
.mHashes = { hash1, hash2 },
.mTailSize = fileSize1 + fileSize2,
};
writeArchive(archive, stream);
}
BSAFile file;
file.open(path);
std::vector<char> namesBuffer = { 'a', '\0', 'b', '\0' };
EXPECT_THAT(file.getList(),
ElementsAre(
BSAFile::FileStruct{
.mFileSize = 42,
.mOffset = 56,
.mHash = BSAFile::Hash{ .mLow = 0xaaaabbbb, .mHigh = 0xccccdddd },
.mNameOffset = 0,
.mNameSize = 1,
.mNamesBuffer = &namesBuffer,
},
BSAFile::FileStruct{
.mFileSize = 13,
.mOffset = 98,
.mHash = BSAFile::Hash{ .mLow = 0x11112222, .mHigh = 0x33334444 },
.mNameOffset = 2,
.mNameSize = 1,
.mNamesBuffer = &namesBuffer,
}));
}
TEST(BSAFileTest, shouldHandleSingleFileAtTheEndOfLargeFile)
{
constexpr std::uint32_t maxUInt32 = std::numeric_limits<uint32_t>::max();
const std::string buffer = makeBsaBuffer(maxUInt32, maxUInt32 - 34);
TestBSAFile file;
// Use capacity assuming we never read beyond small header.
Files::IMemStream stream(buffer.data(), buffer.capacity());
file.readHeader(stream);
std::vector<char> namesBuffer = { 'a', '\0' };
EXPECT_THAT(file.getList(),
ElementsAre(BSAFile::FileStruct{
.mFileSize = maxUInt32,
.mOffset = maxUInt32,
.mHash = BSAFile::Hash{ .mLow = 0xaaaabbbb, .mHigh = 0xccccdddd },
.mNameOffset = 0,
.mNameSize = 1,
.mNamesBuffer = &namesBuffer,
}));
}
TEST(BSAFileTest, shouldThrowExceptionOnTooBigAbsoluteOffset)
{
constexpr std::uint32_t maxUInt32 = std::numeric_limits<uint32_t>::max();
const std::string buffer = makeBsaBuffer(maxUInt32, maxUInt32 - 34 + 1);
TestBSAFile file;
// Use capacity assuming we never read beyond small header.
Files::IMemStream stream(buffer.data(), buffer.capacity());
EXPECT_THROW(file.readHeader(stream), std::runtime_error);
EXPECT_THAT(file.getList(), IsEmpty());
}
}
}

View file

@ -0,0 +1,360 @@
#include "operators.hpp"
#include <components/bsa/compressedbsafile.hpp>
#include <components/testing/util.hpp>
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <cstdint>
#include <filesystem>
#include <format>
#include <fstream>
#include <string>
namespace Bsa
{
namespace
{
using namespace ::testing;
struct FileRecord
{
std::uint64_t mHash;
std::uint32_t mSize;
std::uint32_t mOffset;
std::string mName;
};
struct NonSSEFolderRecord
{
std::uint64_t mHash;
std::uint32_t mCount;
std::int32_t mOffset;
std::string mName;
std::vector<FileRecord> mFiles;
};
struct Archive
{
CompressedBSAFile::Header mHeader;
std::vector<NonSSEFolderRecord> mFolders;
};
void writeArchive(const Archive& value, std::ostream& stream)
{
stream.write(reinterpret_cast<const char*>(&value.mHeader), sizeof(value.mHeader));
for (const NonSSEFolderRecord& folder : value.mFolders)
{
stream.write(reinterpret_cast<const char*>(&folder.mHash), sizeof(folder.mHash));
stream.write(reinterpret_cast<const char*>(&folder.mCount), sizeof(folder.mCount));
stream.write(reinterpret_cast<const char*>(&folder.mOffset), sizeof(folder.mOffset));
}
for (const NonSSEFolderRecord& folder : value.mFolders)
{
const std::uint8_t folderNameSize = static_cast<std::uint8_t>(folder.mName.size() + 1);
stream.write(reinterpret_cast<const char*>(&folderNameSize), sizeof(folderNameSize));
stream.write(reinterpret_cast<const char*>(folder.mName.data()), folder.mName.size());
stream.put('\0');
for (const FileRecord& file : folder.mFiles)
{
stream.write(reinterpret_cast<const char*>(&file.mHash), sizeof(file.mHash));
stream.write(reinterpret_cast<const char*>(&file.mSize), sizeof(file.mSize));
stream.write(reinterpret_cast<const char*>(&file.mOffset), sizeof(file.mOffset));
}
}
for (const NonSSEFolderRecord& folder : value.mFolders)
{
for (const FileRecord& file : folder.mFiles)
{
stream.write(reinterpret_cast<const char*>(file.mName.data()), file.mName.size());
stream.put('\0');
}
}
}
std::filesystem::path makeOutputPath()
{
const auto testInfo = UnitTest::GetInstance()->current_test_info();
return TestingOpenMW::outputFilePath(
std::format("{}.{}.bsa", testInfo->test_suite_name(), testInfo->name()));
}
TEST(CompressedBSAFileTest, shouldHandleEmpty)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
}
CompressedBSAFile file;
EXPECT_THROW(file.open(path), std::runtime_error);
EXPECT_THAT(file.getList(), IsEmpty());
}
TEST(CompressedBSAFileTest, shouldHandleSingleFile)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const CompressedBSAFile::Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Compressed),
.mVersion = CompressedBSAFile::Version_TES4,
.mFoldersOffset = sizeof(CompressedBSAFile::Header),
.mFlags = CompressedBSAFile::ArchiveFlag_FolderNames | CompressedBSAFile::ArchiveFlag_FileNames,
.mFolderCount = 1,
.mFileCount = 1,
.mFolderNamesLength = 7,
.mFileNamesLength = 9,
.mFileFlags = 0,
};
const FileRecord file{
.mHash = 0xfedcba9876543210,
.mSize = 42,
.mOffset = 0,
.mName = "filename",
};
const NonSSEFolderRecord folder{
.mHash = 0xfedcba9876543210,
.mCount = 1,
.mOffset = 0,
.mName = "folder",
.mFiles = { file },
};
const Archive archive{
.mHeader = header,
.mFolders = { folder },
};
writeArchive(archive, stream);
}
CompressedBSAFile file;
file.open(path);
std::vector<char> namesBuffer;
constexpr std::string_view filePath = "folder\\filename";
namesBuffer.assign(filePath.begin(), filePath.end());
namesBuffer.push_back('\0');
EXPECT_THAT(file.getList(),
ElementsAre(BSAFile::FileStruct{
.mFileSize = 42,
.mOffset = 0,
.mHash = BSAFile::Hash{ .mLow = 0, .mHigh = 0 },
.mNameOffset = 0,
.mNameSize = 15,
.mNamesBuffer = &namesBuffer,
}));
}
TEST(CompressedBSAFileTest, shouldHandleEmptyFileName)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const CompressedBSAFile::Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Compressed),
.mVersion = CompressedBSAFile::Version_TES4,
.mFoldersOffset = sizeof(CompressedBSAFile::Header),
.mFlags = CompressedBSAFile::ArchiveFlag_FolderNames | CompressedBSAFile::ArchiveFlag_FileNames,
.mFolderCount = 1,
.mFileCount = 1,
.mFolderNamesLength = 7,
.mFileNamesLength = 1,
.mFileFlags = 0,
};
const FileRecord file{
.mHash = 0xfedcba9876543210,
.mSize = 42,
.mOffset = 0,
.mName = "",
};
const NonSSEFolderRecord folder{
.mHash = 0xfedcba9876543210,
.mCount = 1,
.mOffset = 0,
.mName = "folder",
.mFiles = { file },
};
const Archive archive{
.mHeader = header,
.mFolders = { folder },
};
writeArchive(archive, stream);
}
CompressedBSAFile file;
EXPECT_THROW(file.open(path), std::runtime_error);
}
TEST(CompressedBSAFileTest, shouldHandleFoldersWithDuplicateHash)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const CompressedBSAFile::Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Compressed),
.mVersion = CompressedBSAFile::Version_TES4,
.mFoldersOffset = sizeof(CompressedBSAFile::Header),
.mFlags = CompressedBSAFile::ArchiveFlag_FolderNames | CompressedBSAFile::ArchiveFlag_FileNames,
.mFolderCount = 2,
.mFileCount = 2,
.mFolderNamesLength = 16,
.mFileNamesLength = 18,
.mFileFlags = 0,
};
const FileRecord file{
.mHash = 0xfedcba9876543210,
.mSize = 42,
.mOffset = 0,
.mName = "filename",
};
const NonSSEFolderRecord folder1{
.mHash = 0xfedcba9876543210,
.mCount = 1,
.mOffset = 0,
.mName = "folder1",
.mFiles = { file },
};
const NonSSEFolderRecord folder2{
.mHash = 0xfedcba9876543210,
.mCount = 1,
.mOffset = 0,
.mName = "folder2",
.mFiles = { file },
};
const Archive archive{
.mHeader = header,
.mFolders = { folder1, folder2 },
};
writeArchive(archive, stream);
}
CompressedBSAFile file;
file.open(path);
std::vector<char> namesBuffer;
constexpr std::string_view filePath = "folder2\\filename";
namesBuffer.assign(filePath.begin(), filePath.end());
namesBuffer.push_back('\0');
EXPECT_THAT(file.getList(),
ElementsAre(BSAFile::FileStruct{
.mFileSize = 42,
.mOffset = 0,
.mHash = BSAFile::Hash{ .mLow = 0, .mHigh = 0 },
.mNameOffset = 0,
.mNameSize = 16,
.mNamesBuffer = &namesBuffer,
}));
}
TEST(CompressedBSAFileTest, shouldHandleFilesWithDuplicateHash)
{
const std::filesystem::path path = makeOutputPath();
{
std::ofstream stream;
stream.exceptions(std::ifstream::failbit | std::ifstream::badbit);
stream.open(path, std::ios::binary);
const CompressedBSAFile::Header header{
.mFormat = static_cast<std::uint32_t>(BsaVersion::Compressed),
.mVersion = CompressedBSAFile::Version_TES4,
.mFoldersOffset = sizeof(CompressedBSAFile::Header),
.mFlags = CompressedBSAFile::ArchiveFlag_FolderNames | CompressedBSAFile::ArchiveFlag_FileNames,
.mFolderCount = 1,
.mFileCount = 2,
.mFolderNamesLength = 9,
.mFileNamesLength = 18,
.mFileFlags = 0,
};
const FileRecord file1{
.mHash = 0xfedcba9876543210,
.mSize = 42,
.mOffset = 0,
.mName = "filename1",
};
const FileRecord file2{
.mHash = 0xfedcba9876543210,
.mSize = 13,
.mOffset = 0,
.mName = "filename2",
};
const NonSSEFolderRecord folder{
.mHash = 0xfedcba9876543210,
.mCount = 2,
.mOffset = 0,
.mName = "folder",
.mFiles = { file1, file2 },
};
const Archive archive{
.mHeader = header,
.mFolders = { folder },
};
writeArchive(archive, stream);
}
CompressedBSAFile file;
file.open(path);
std::vector<char> namesBuffer;
constexpr std::string_view filePath = "folder\\filename2";
namesBuffer.assign(filePath.begin(), filePath.end());
namesBuffer.push_back('\0');
EXPECT_THAT(file.getList(),
ElementsAre(BSAFile::FileStruct{
.mFileSize = 13,
.mOffset = 0,
.mHash = BSAFile::Hash{ .mLow = 0, .mHigh = 0 },
.mNameOffset = 0,
.mNameSize = 16,
.mNamesBuffer = &namesBuffer,
}));
}
}
}

View file

@ -321,7 +321,7 @@ ENDIF()
add_component_dir (files
linuxpath androidpath windowspath macospath fixedpath multidircollection collections configurationmanager
constrainedfilestream memorystream hash configfileparser openfile constrainedfilestreambuf conversion
istreamptr streamwithbuffer
istreamptr streamwithbuffer utils
)
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" AND NOT CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")

View file

@ -4,13 +4,15 @@
#include <cassert>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <format>
#include <istream>
#include <zlib.h>
#include <components/esm/fourcc.hpp>
#include <components/files/constrainedfilestream.hpp>
#include <components/files/conversion.hpp>
#include <components/files/utils.hpp>
#include <components/misc/strings/lower.hpp>
#include "ba2file.hpp"
@ -73,19 +75,11 @@ namespace Bsa
}
/// Read header information from the input source
void BA2DX10File::readHeader()
void BA2DX10File::readHeader(std::istream& input)
{
assert(!mIsLoaded);
std::ifstream input(mFilepath, std::ios_base::binary);
// Total archive size
std::streamoff fsize = 0;
if (input.seekg(0, std::ios_base::end))
{
fsize = input.tellg();
input.seekg(0);
}
const std::streamsize fsize = Files::getStreamSizeLeft(input);
if (fsize < 24) // header is 24 bytes
fail("File too small to be a valid BSA archive");
@ -135,23 +129,22 @@ namespace Bsa
std::vector<char> fileName;
uint16_t fileNameSize;
input.read(reinterpret_cast<char*>(&fileNameSize), sizeof(uint16_t));
fileName.resize(fileNameSize);
input.read(fileName.data(), fileName.size());
fileName.push_back('\0');
fileName.resize(fileNameSize + 1);
input.read(fileName.data(), fileNameSize);
mFileNames.push_back(std::move(fileName));
mFiles[i].setNameInfos(0, &mFileNames.back());
mFiles[i].mNameOffset = 0;
mFiles[i].mNameSize = fileNameSize;
mFiles[i].mNamesBuffer = &mFileNames.back();
}
mIsLoaded = true;
}
std::optional<BA2DX10File::FileRecord> BA2DX10File::getFileRecord(const std::string& str) const
std::optional<BA2DX10File::FileRecord> BA2DX10File::getFileRecord(std::string_view str) const
{
for (const auto c : str)
{
if (((static_cast<unsigned>(c) >> 7U) & 1U) != 0U)
{
fail("File record " + str + " contains unicode characters, refusing to load.");
fail(std::format("File record {} contains unicode characters, refusing to load.", str));
}
}
@ -161,7 +154,7 @@ namespace Bsa
// Force-convert the path into something UNIX can handle first
// to make sure std::filesystem::path doesn't think the entire path is the filename on Linux
// and subsequently purge it to determine the file folder.
std::string path = str;
std::string path(str);
std::replace(path.begin(), path.end(), '\\', '/');
#endif

View file

@ -41,7 +41,7 @@ namespace Bsa
std::list<std::vector<char>> mFileNames;
std::optional<FileRecord> getFileRecord(const std::string& str) const;
std::optional<FileRecord> getFileRecord(std::string_view str) const;
Files::IStreamPtr getFile(const FileRecord& fileRecord);
@ -57,7 +57,7 @@ namespace Bsa
virtual ~BA2DX10File();
/// Read header information from the input source
void readHeader() override;
void readHeader(std::istream& stream) override;
Files::IStreamPtr getFile(const char* filePath);
Files::IStreamPtr getFile(const FileStruct* fileStruct);

View file

@ -3,6 +3,7 @@
#include <algorithm>
#include <cassert>
#include <filesystem>
#include <format>
#include <fstream>
#include <zlib.h>
@ -10,6 +11,7 @@
#include <components/esm/fourcc.hpp>
#include <components/files/constrainedfilestream.hpp>
#include <components/files/conversion.hpp>
#include <components/files/utils.hpp>
#include <components/misc/strings/lower.hpp>
#include "ba2file.hpp"
@ -61,26 +63,18 @@ namespace Bsa
mFolders[dirHash][{ nameHash, extHash }] = file;
FileStruct fileStruct{};
fileStruct.fileSize = file.size;
fileStruct.offset = file.offset;
fileStruct.mFileSize = file.size;
fileStruct.mOffset = file.offset;
mFiles.push_back(fileStruct);
}
}
/// Read header information from the input source
void BA2GNRLFile::readHeader()
void BA2GNRLFile::readHeader(std::istream& input)
{
assert(!mIsLoaded);
std::ifstream input(mFilepath, std::ios_base::binary);
// Total archive size
std::streamoff fsize = 0;
if (input.seekg(0, std::ios_base::end))
{
fsize = input.tellg();
input.seekg(0);
}
const std::streamsize fsize = Files::getStreamSizeLeft(input);
if (fsize < 24) // header is 24 bytes
fail("File too small to be a valid BSA archive");
@ -126,23 +120,22 @@ namespace Bsa
std::vector<char> fileName;
uint16_t fileNameSize;
input.read(reinterpret_cast<char*>(&fileNameSize), sizeof(uint16_t));
fileName.resize(fileNameSize);
input.read(fileName.data(), fileName.size());
fileName.push_back('\0');
fileName.resize(fileNameSize + 1);
input.read(fileName.data(), fileNameSize);
mFileNames.push_back(std::move(fileName));
mFiles[i].setNameInfos(0, &mFileNames.back());
mFiles[i].mNameOffset = 0;
mFiles[i].mNameSize = fileNameSize;
mFiles[i].mNamesBuffer = &mFileNames.back();
}
mIsLoaded = true;
}
BA2GNRLFile::FileRecord BA2GNRLFile::getFileRecord(const std::string& str) const
BA2GNRLFile::FileRecord BA2GNRLFile::getFileRecord(std::string_view str) const
{
for (const auto c : str)
{
if (((static_cast<unsigned>(c) >> 7U) & 1U) != 0U)
{
fail("File record " + str + " contains unicode characters, refusing to load.");
fail(std::format("File record {} contains unicode characters, refusing to load.", str));
}
}
@ -152,7 +145,7 @@ namespace Bsa
// Force-convert the path into something UNIX can handle first
// to make sure std::filesystem::path doesn't think the entire path is the filename on Linux
// and subsequently purge it to determine the file folder.
std::string path = str;
std::string path(str);
std::replace(path.begin(), path.end(), '\\', '/');
#endif

View file

@ -29,7 +29,7 @@ namespace Bsa
std::list<std::vector<char>> mFileNames;
FileRecord getFileRecord(const std::string& str) const;
FileRecord getFileRecord(std::string_view str) const;
Files::IStreamPtr getFile(const FileRecord& fileRecord);
@ -45,7 +45,7 @@ namespace Bsa
virtual ~BA2GNRLFile();
/// Read header information from the input source
void readHeader() override;
void readHeader(std::istream& input) override;
Files::IStreamPtr getFile(const char* filePath);
Files::IStreamPtr getFile(const FileStruct* fileStruct);

View file

@ -25,12 +25,17 @@
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <cstring>
#include <filesystem>
#include <format>
#include <fstream>
#include <istream>
#include <system_error>
#include <components/esm/fourcc.hpp>
#include <components/files/constrainedfilestream.hpp>
#include <components/files/utils.hpp>
using namespace Bsa;
@ -54,7 +59,7 @@ BSAFile::Hash getHash(const std::string& name)
sum ^= (((unsigned)(name[i])) << (off & 0x1F));
off += 8;
}
hash.low = sum;
hash.mLow = sum;
for (sum = off = 0; i < name.size(); i++)
{
@ -64,12 +69,12 @@ BSAFile::Hash getHash(const std::string& name)
sum = (sum << (32 - n)) | (sum >> n); // binary "rotate right"
off += 8;
}
hash.high = sum;
hash.mHigh = sum;
return hash;
}
/// Read header information from the input source
void BSAFile::readHeader()
void BSAFile::readHeader(std::istream& input)
{
/*
* The layout of a BSA archive is as follows:
@ -103,27 +108,24 @@ void BSAFile::readHeader()
*/
assert(!mIsLoaded);
std::ifstream input(mFilepath, std::ios_base::binary);
// Total archive size
std::streamoff fsize = 0;
if (input.seekg(0, std::ios_base::end))
{
fsize = input.tellg();
input.seekg(0);
}
const std::streamsize fsize = Files::getStreamSizeLeft(input);
if (fsize < 12)
fail("File too small to be a valid BSA archive");
// Get essential header numbers
size_t dirsize, filenum;
std::streamsize dirsize;
std::streamsize filenum;
{
// First 12 bytes
uint32_t head[3];
input.read(reinterpret_cast<char*>(head), 12);
if (input.fail())
fail(std::format("Failed to read head: {}", std::generic_category().message(errno)));
if (head[0] != 0x100)
fail("Unrecognized BSA header");
@ -138,62 +140,83 @@ void BSAFile::readHeader()
// Each file must take up at least 21 bytes of data in the bsa. So
// if files*21 overflows the file size then we are guaranteed that
// the archive is corrupt.
if ((filenum * 21 > unsigned(fsize - 12)) || (dirsize + 8 * filenum > unsigned(fsize - 12)))
if (filenum * 21 > fsize - 12 || dirsize + 8 * filenum > fsize - 12)
fail("Directory information larger than entire archive");
// Read the offset info into a temporary buffer
std::vector<uint32_t> offsets(3 * filenum);
input.read(reinterpret_cast<char*>(offsets.data()), 12 * filenum);
if (input.fail())
fail(std::format("Failed to read offsets: {}", std::generic_category().message(errno)));
// Read the string table
mStringBuf.resize(dirsize - 12 * filenum);
input.read(mStringBuf.data(), mStringBuf.size());
if (input.fail())
fail(std::format("Failed to read string table: {}", std::generic_category().message(errno)));
// Check our position
assert(input.tellg() == std::streampos(12 + dirsize));
std::vector<Hash> hashes(filenum);
static_assert(sizeof(Hash) == 8);
input.read(reinterpret_cast<char*>(hashes.data()), 8 * filenum);
if (input.fail())
fail(std::format("Failed to read hashes: {}", std::generic_category().message(errno)));
// Calculate the offset of the data buffer. All file offsets are
// relative to this. 12 header bytes + directory + hash table
// (skipped)
size_t fileDataOffset = 12 + dirsize + 8 * filenum;
const std::streamsize fileDataOffset = 12 + dirsize + 8 * filenum;
// Set up the the FileStruct table
mFiles.resize(filenum);
mFiles.reserve(filenum);
size_t endOfNameBuffer = 0;
for (size_t i = 0; i < filenum; i++)
for (std::streamsize i = 0; i < filenum; i++)
{
FileStruct& fs = mFiles[i];
fs.fileSize = offsets[i * 2];
fs.offset = static_cast<uint32_t>(offsets[i * 2 + 1] + fileDataOffset);
auto namesOffset = offsets[2 * filenum + i];
fs.setNameInfos(namesOffset, &mStringBuf);
fs.hash = hashes[i];
const uint32_t fileSize = offsets[i * 2];
const std::streamsize offset = static_cast<std::streamsize>(offsets[i * 2 + 1]) + fileDataOffset;
if (namesOffset >= mStringBuf.size())
{
if (fileSize + offset > fsize)
fail(std::format("Archive contains offsets outside itself: {} + {} > {}", fileSize, offset, fsize));
if (offset > std::numeric_limits<uint32_t>::max())
fail(std::format(
"Absolute file {} offset is too large: {} > {}", i, offset, std::numeric_limits<uint32_t>::max()));
const uint32_t nameOffset = offsets[2 * filenum + i];
if (nameOffset >= mStringBuf.size())
fail("Archive contains names offset outside itself");
}
const void* end = std::memchr(fs.name(), '\0', mStringBuf.size() - namesOffset);
if (!end)
{
const char* const begin = mStringBuf.data() + nameOffset;
const char* const end = reinterpret_cast<const char*>(std::memchr(begin, '\0', mStringBuf.size() - nameOffset));
if (end == nullptr)
fail("Archive contains non-zero terminated string");
}
endOfNameBuffer = std::max(endOfNameBuffer, namesOffset + std::strlen(fs.name()) + 1);
const std::size_t nameSize = end - begin;
FileStruct fs;
fs.mFileSize = fileSize;
fs.mOffset = static_cast<uint32_t>(offset);
fs.mHash = hashes[i];
fs.mNameOffset = nameOffset;
fs.mNameSize = static_cast<uint32_t>(nameSize);
fs.mNamesBuffer = &mStringBuf;
mFiles.push_back(fs);
endOfNameBuffer = std::max(endOfNameBuffer, nameOffset + nameSize + 1);
assert(endOfNameBuffer <= mStringBuf.size());
if (fs.offset + fs.fileSize > fsize)
fail("Archive contains offsets outside itself");
}
mStringBuf.resize(endOfNameBuffer);
std::sort(mFiles.begin(), mFiles.end(),
[](const FileStruct& left, const FileStruct& right) { return left.offset < right.offset; });
mIsLoaded = true;
[](const FileStruct& left, const FileStruct& right) { return left.mOffset < right.mOffset; });
}
/// Write header information to the output sink
@ -203,7 +226,7 @@ void Bsa::BSAFile::writeHeader()
uint32_t head[3];
head[0] = 0x100;
auto fileDataOffset = mFiles.empty() ? 12 : mFiles.front().offset;
auto fileDataOffset = mFiles.empty() ? 12 : mFiles.front().mOffset;
head[1] = static_cast<uint32_t>(fileDataOffset - 12 - 8 * mFiles.size());
output.seekp(0, std::ios_base::end);
@ -213,7 +236,7 @@ void Bsa::BSAFile::writeHeader()
output.write(reinterpret_cast<char*>(head), 12);
std::sort(mFiles.begin(), mFiles.end(), [](const FileStruct& left, const FileStruct& right) {
return std::make_pair(left.hash.low, left.hash.high) < std::make_pair(right.hash.low, right.hash.high);
return std::make_pair(left.mHash.mLow, left.mHash.mHigh) < std::make_pair(right.mHash.mLow, right.mHash.mHigh);
});
size_t filenum = mFiles.size();
@ -222,10 +245,10 @@ void Bsa::BSAFile::writeHeader()
for (size_t i = 0; i < filenum; i++)
{
auto& f = mFiles[i];
offsets[i * 2] = f.fileSize;
offsets[i * 2 + 1] = f.offset - fileDataOffset;
offsets[2 * filenum + i] = f.namesOffset;
hashes[i] = f.hash;
offsets[i * 2] = f.mFileSize;
offsets[i * 2 + 1] = f.mOffset - fileDataOffset;
offsets[2 * filenum + i] = f.mNameOffset;
hashes[i] = f.mHash;
}
output.write(reinterpret_cast<char*>(offsets.data()), sizeof(uint32_t) * offsets.size());
output.write(reinterpret_cast<char*>(mStringBuf.data()), mStringBuf.size());
@ -241,7 +264,11 @@ void BSAFile::open(const std::filesystem::path& file)
mFilepath = file;
if (std::filesystem::exists(file))
readHeader();
{
std::ifstream input(mFilepath, std::ios_base::binary);
readHeader(input);
mIsLoaded = true;
}
else
{
{
@ -265,7 +292,7 @@ void Bsa::BSAFile::close()
Files::IStreamPtr Bsa::BSAFile::getFile(const FileStruct* file)
{
return Files::openConstrainedFileStream(mFilepath, file->offset, file->fileSize);
return Files::openConstrainedFileStream(mFilepath, file->mOffset, file->mFileSize);
}
void Bsa::BSAFile::addFile(const std::string& filename, std::istream& file)
@ -281,37 +308,41 @@ void Bsa::BSAFile::addFile(const std::string& filename, std::istream& file)
FileStruct newFile;
file.seekg(0, std::ios::end);
newFile.fileSize = static_cast<uint32_t>(file.tellg());
newFile.setNameInfos(mStringBuf.size(), &mStringBuf);
newFile.hash = getHash(filename);
newFile.mFileSize = static_cast<uint32_t>(file.tellg());
newFile.mHash = getHash(filename);
if (mFiles.empty())
newFile.offset = static_cast<uint32_t>(newStartOfDataBuffer);
newFile.mOffset = static_cast<uint32_t>(newStartOfDataBuffer);
else
{
std::vector<char> buffer;
while (mFiles.front().offset < newStartOfDataBuffer)
while (mFiles.front().mOffset < newStartOfDataBuffer)
{
FileStruct& firstFile = mFiles.front();
buffer.resize(firstFile.fileSize);
buffer.resize(firstFile.mFileSize);
stream.seekg(firstFile.offset, std::ios::beg);
stream.read(buffer.data(), firstFile.fileSize);
stream.seekg(firstFile.mOffset, std::ios::beg);
stream.read(buffer.data(), firstFile.mFileSize);
stream.seekp(0, std::ios::end);
firstFile.offset = static_cast<uint32_t>(stream.tellp());
firstFile.mOffset = static_cast<uint32_t>(stream.tellp());
stream.write(buffer.data(), firstFile.fileSize);
stream.write(buffer.data(), firstFile.mFileSize);
// ensure sort order is preserved
std::rotate(mFiles.begin(), mFiles.begin() + 1, mFiles.end());
}
stream.seekp(0, std::ios::end);
newFile.offset = static_cast<uint32_t>(stream.tellp());
newFile.mOffset = static_cast<uint32_t>(stream.tellp());
}
newFile.mNameOffset = mStringBuf.size();
newFile.mNameSize = filename.size();
newFile.mNamesBuffer = &mStringBuf;
mStringBuf.insert(mStringBuf.end(), filename.begin(), filename.end());
mStringBuf.push_back('\0');
mFiles.push_back(newFile);
mHasChanged = true;

View file

@ -26,6 +26,7 @@
#include <cstdint>
#include <filesystem>
#include <iosfwd>
#include <string>
#include <vector>
@ -54,30 +55,25 @@ namespace Bsa
#pragma pack(1)
struct Hash
{
uint32_t low, high;
uint32_t mLow;
uint32_t mHigh;
};
#pragma pack(pop)
/// Represents one file entry in the archive
struct FileStruct
{
void setNameInfos(size_t index, std::vector<char>* stringBuf)
{
namesOffset = static_cast<uint32_t>(index);
namesBuffer = stringBuf;
}
// File size and offset in file. We store the offset from the
// beginning of the file, not the offset into the data buffer
// (which is what is stored in the archive.)
uint32_t fileSize, offset;
Hash hash;
uint32_t mFileSize = 0;
uint32_t mOffset = 0;
Hash mHash{};
uint32_t mNameOffset = 0;
uint32_t mNameSize = 0;
std::vector<char>* mNamesBuffer = nullptr;
// Zero-terminated file name
const char* name() const { return &(*namesBuffer)[namesOffset]; }
uint32_t namesOffset = 0;
std::vector<char>* namesBuffer = nullptr;
std::string_view name() const { return std::string_view(mNamesBuffer->data() + mNameOffset, mNameSize); }
};
typedef std::vector<FileStruct> FileList;
@ -100,7 +96,7 @@ namespace Bsa
[[noreturn]] void fail(const std::string& msg) const;
/// Read header information from the input source
virtual void readHeader();
virtual void readHeader(std::istream& input);
virtual void writeHeader();
public:
@ -151,7 +147,6 @@ namespace Bsa
// checks version of BSA from file header
static BsaVersion detectVersion(const std::filesystem::path& filePath);
};
}
#endif

View file

@ -26,14 +26,18 @@
#include <algorithm>
#include <cassert>
#include <cerrno>
#include <filesystem>
#include <fstream>
#include <format>
#include <istream>
#include <system_error>
#include <lz4frame.h>
#include <zlib.h>
#include <components/files/constrainedfilestream.hpp>
#include <components/files/conversion.hpp>
#include <components/files/utils.hpp>
#include <components/misc/strings/lower.hpp>
#include "memorystream.hpp"
@ -41,19 +45,11 @@
namespace Bsa
{
/// Read header information from the input source
void CompressedBSAFile::readHeader()
void CompressedBSAFile::readHeader(std::istream& input)
{
assert(!mIsLoaded);
std::ifstream input(mFilepath, std::ios_base::binary);
// Total archive size
std::streamoff fsize = 0;
if (input.seekg(0, std::ios_base::end))
{
fsize = input.tellg();
input.seekg(0);
}
const std::streamsize fsize = Files::getStreamSizeLeft(input);
if (fsize < 36) // Header is 36 bytes
fail("File too small to be a valid BSA archive");
@ -69,8 +65,8 @@ namespace Bsa
mHeader.mFlags &= (~ArchiveFlag_EmbeddedNames);
input.seekg(mHeader.mFoldersOffset);
if (input.bad())
fail("Invalid compressed BSA folder record offset");
if (input.fail())
fail("Failed to read compressed BSA folder record offset: " + std::generic_category().message(errno));
struct FlatFolderRecord
{
@ -81,9 +77,12 @@ namespace Bsa
};
std::vector<std::pair<FlatFolderRecord, std::vector<FileRecord>>> folders;
folders.resize(mHeader.mFolderCount);
for (auto& [folder, filelist] : folders)
folders.reserve(mHeader.mFolderCount);
for (std::uint32_t i = 0; i < mHeader.mFolderCount; ++i)
{
FlatFolderRecord folder;
input.read(reinterpret_cast<char*>(&folder.mHash), 8);
input.read(reinterpret_cast<char*>(&folder.mCount), 4);
if (mHeader.mVersion == Version_SSE) // SSE
@ -96,10 +95,13 @@ namespace Bsa
{
input.read(reinterpret_cast<char*>(&folder.mOffset), 4);
}
}
if (input.bad())
fail("Failed to read compressed BSA folder records: input error");
if (input.fail())
fail(std::format(
"Failed to read compressed BSA folder record: {}", std::generic_category().message(errno)));
folders.emplace_back(std::move(folder), std::vector<FileRecord>());
}
// file record blocks
if ((mHeader.mFlags & ArchiveFlag_FolderNames) == 0)
@ -126,20 +128,29 @@ namespace Bsa
mHeader.mFolderNamesLength -= size;
}
filelist.resize(folder.mCount);
for (auto& file : filelist)
filelist.reserve(folder.mCount);
for (std::uint32_t i = 0; i < folder.mCount; ++i)
{
FileRecord file;
input.read(reinterpret_cast<char*>(&file.mHash), 8);
input.read(reinterpret_cast<char*>(&file.mSize), 4);
input.read(reinterpret_cast<char*>(&file.mOffset), 4);
if (input.fail())
fail(std::format("Failed to read compressed BSA folder file record: {}",
std::generic_category().message(errno)));
filelist.push_back(std::move(file));
}
}
if (mHeader.mFolderNamesLength != 0)
input.ignore(mHeader.mFolderNamesLength);
if (input.bad())
fail("Failed to read compressed BSA file records: input error");
if (input.fail())
fail(std::format("Failed to read compressed BSA file records: {}", std::generic_category().message(errno)));
if ((mHeader.mFlags & ArchiveFlag_FileNames) != 0)
{
@ -168,36 +179,42 @@ namespace Bsa
if (mHeader.mFileNamesLength != 0)
input.ignore(mHeader.mFileNamesLength);
if (input.bad())
fail("Failed to read compressed BSA filenames: input error");
if (input.fail())
fail(std::format("Failed to read compressed BSA filenames: {}", std::generic_category().message(errno)));
for (auto& [folder, filelist] : folders)
{
std::map<std::uint64_t, FileRecord> fileMap;
for (const auto& file : filelist)
for (auto& file : filelist)
fileMap[file.mHash] = std::move(file);
auto& folderMap = mFolders[folder.mHash];
folderMap = FolderRecord{ folder.mCount, folder.mOffset, std::move(fileMap) };
for (auto& [hash, fileRec] : folderMap.mFiles)
{
FileStruct fileStruct{};
fileStruct.fileSize = fileRec.mSize & (~FileSizeFlag_Compression);
fileStruct.offset = fileRec.mOffset;
fileStruct.setNameInfos(0, &fileRec.mName);
mFiles.emplace_back(fileStruct);
}
mFolders[folder.mHash] = FolderRecord{ folder.mCount, folder.mOffset, folder.mName, std::move(fileMap) };
}
mIsLoaded = true;
for (auto& [folderHash, folderRecord] : mFolders)
{
for (auto& [fileHash, fileRecord] : folderRecord.mFiles)
{
FileStruct fileStruct{};
fileStruct.mFileSize = fileRecord.mSize & (~FileSizeFlag_Compression);
fileStruct.mOffset = fileRecord.mOffset;
fileStruct.mNameOffset = 0;
fileStruct.mNameSize
= fileRecord.mName.empty() ? 0 : static_cast<uint32_t>(fileRecord.mName.size() - 1);
fileStruct.mNamesBuffer = &fileRecord.mName;
mFiles.push_back(fileStruct);
}
}
}
CompressedBSAFile::FileRecord CompressedBSAFile::getFileRecord(const std::string& str) const
CompressedBSAFile::FileRecord CompressedBSAFile::getFileRecord(std::string_view str) const
{
for (const auto c : str)
{
if (((static_cast<unsigned>(c) >> 7U) & 1U) != 0U)
{
fail("File record " + str + " contains unicode characters, refusing to load.");
fail(std::format("File record {} contains unicode characters, refusing to load.", str));
}
}
@ -207,7 +224,7 @@ namespace Bsa
// Force-convert the path into something UNIX can handle first
// to make sure std::filesystem::path doesn't think the entire path is the filename on Linux
// and subsequently purge it to determine the file folder.
std::string path = str;
std::string path(str);
std::replace(path.begin(), path.end(), '\\', '/');
#endif

View file

@ -36,7 +36,7 @@ namespace Bsa
{
class CompressedBSAFile : private BSAFile
{
private:
public:
enum ArchiveFlags
{
ArchiveFlag_FolderNames = 0x0001,
@ -89,8 +89,6 @@ namespace Bsa
std::uint32_t mFileFlags;
};
Header mHeader;
struct FileRecord
{
std::uint64_t mHash;
@ -103,12 +101,15 @@ namespace Bsa
{
std::uint32_t mCount;
std::int64_t mOffset;
std::string mName;
std::map<std::uint64_t, FileRecord> mFiles;
};
private:
Header mHeader;
std::map<std::uint64_t, FolderRecord> mFolders;
FileRecord getFileRecord(const std::string& str) const;
FileRecord getFileRecord(std::string_view str) const;
/// \brief Normalizes given filename or folder and generates format-compatible hash.
static std::uint64_t generateHash(const std::filesystem::path& stem, std::string extension);
@ -124,7 +125,7 @@ namespace Bsa
virtual ~CompressedBSAFile() = default;
/// Read header information from the input source
void readHeader() override;
void readHeader(std::istream& input) override;
Files::IStreamPtr getFile(const char* filePath);
Files::IStreamPtr getFile(const FileStruct* fileStruct);

View file

@ -0,0 +1,38 @@
#ifndef COMPONENTS_FILES_UTILS_H
#define COMPONENTS_FILES_UTILS_H
#include <cerrno>
#include <format>
#include <istream>
#include <stdexcept>
#include <system_error>
namespace Files
{
inline std::streamsize getStreamSizeLeft(std::istream& stream)
{
const auto begin = stream.tellg();
if (stream.fail())
throw std::runtime_error(
std::format("Failed to get current file position: {}", std::generic_category().message(errno)));
stream.seekg(0, std::ios_base::end);
if (stream.fail())
throw std::runtime_error(
std::format("Failed to seek end file position: {}", std::generic_category().message(errno)));
const auto end = stream.tellg();
if (stream.fail())
throw std::runtime_error(
std::format("Failed to get current file position: {}", std::generic_category().message(errno)));
stream.seekg(begin);
if (stream.fail())
throw std::runtime_error(
std::format("Failed to seek original file position: {}", std::generic_category().message(errno)));
return end - begin;
}
}
#endif