mirror of
				https://github.com/OpenMW/openmw.git
				synced 2025-10-25 15:56:37 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			499 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			499 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|   OpenMW - The completely unofficial reimplementation of Morrowind
 | |
|   Copyright (C) 2008-2010  Nicolay Korslund
 | |
|   Email: < korslund@gmail.com >
 | |
|   WWW: http://openmw.sourceforge.net/
 | |
| 
 | |
|   This file (compressedbsafile.cpp) is part of the OpenMW package.
 | |
| 
 | |
|   OpenMW is distributed as free software: you can redistribute it
 | |
|   and/or modify it under the terms of the GNU General Public License
 | |
|   version 3, as published by the Free Software Foundation.
 | |
| 
 | |
|   This program is distributed in the hope that it will be useful, but
 | |
|   WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|   General Public License for more details.
 | |
| 
 | |
|   You should have received a copy of the GNU General Public License
 | |
|   version 3 along with this program. If not, see
 | |
|   http://www.gnu.org/licenses/ .
 | |
| 
 | |
|   Compressed BSA stuff added by cc9cii 2018
 | |
| 
 | |
|  */
 | |
| #include "compressedbsafile.hpp"
 | |
| 
 | |
| #include <cassert>
 | |
| #include <filesystem>
 | |
| #include <fstream>
 | |
| 
 | |
| #include <lz4frame.h>
 | |
| 
 | |
| #include <boost/iostreams/copy.hpp>
 | |
| #include <boost/iostreams/filtering_streambuf.hpp>
 | |
| 
 | |
| #if defined(_MSC_VER)
 | |
| #pragma warning(push)
 | |
| #pragma warning(disable : 4706)
 | |
| #include <boost/iostreams/filter/zlib.hpp>
 | |
| #pragma warning(pop)
 | |
| #else
 | |
| #include <boost/iostreams/filter/zlib.hpp>
 | |
| #endif
 | |
| 
 | |
| #include <boost/iostreams/device/array.hpp>
 | |
| #include <components/bsa/memorystream.hpp>
 | |
| #include <components/files/constrainedfilestream.hpp>
 | |
| #include <components/files/conversion.hpp>
 | |
| #include <components/misc/strings/lower.hpp>
 | |
| 
 | |
| namespace Bsa
 | |
| {
 | |
|     // special marker for invalid records,
 | |
|     // equal to max uint32_t value
 | |
|     const uint32_t CompressedBSAFile::sInvalidOffset = std::numeric_limits<uint32_t>::max();
 | |
| 
 | |
|     // bit marking compression on file size
 | |
|     const uint32_t CompressedBSAFile::sCompressedFlag = 1u << 30u;
 | |
| 
 | |
|     CompressedBSAFile::FileRecord::FileRecord()
 | |
|         : size(0)
 | |
|         , offset(sInvalidOffset)
 | |
|     {
 | |
|     }
 | |
| 
 | |
|     bool CompressedBSAFile::FileRecord::isValid() const
 | |
|     {
 | |
|         return offset != sInvalidOffset;
 | |
|     }
 | |
| 
 | |
|     bool CompressedBSAFile::FileRecord::isCompressed(bool bsaCompressedByDefault) const
 | |
|     {
 | |
|         bool recordCompressionFlagEnabled = ((size & sCompressedFlag) == sCompressedFlag);
 | |
| 
 | |
|         // record is compressed when:
 | |
|         //- bsaCompressedByDefault flag is set and 30th bit is NOT set, or
 | |
|         //- bsaCompressedByDefault flag is NOT set and 30th bit is set
 | |
|         // record is NOT compressed when:
 | |
|         //- bsaCompressedByDefault flag is NOT set and 30th bit is NOT set, or
 | |
|         //- bsaCompressedByDefault flag is set and 30th bit is set
 | |
|         return (bsaCompressedByDefault != recordCompressionFlagEnabled);
 | |
|     }
 | |
| 
 | |
|     std::uint32_t CompressedBSAFile::FileRecord::getSizeWithoutCompressionFlag() const
 | |
|     {
 | |
|         return size & (~sCompressedFlag);
 | |
|     }
 | |
| 
 | |
|     void CompressedBSAFile::getBZString(std::string& str, std::istream& filestream)
 | |
|     {
 | |
|         char size = 0;
 | |
|         filestream.read(&size, 1);
 | |
| 
 | |
|         auto buf = std::vector<char>(size);
 | |
|         filestream.read(buf.data(), size);
 | |
| 
 | |
|         if (buf[size - 1] != 0)
 | |
|         {
 | |
|             str.assign(buf.data(), size);
 | |
|             if (str.size() != ((size_t)size))
 | |
|             {
 | |
|                 fail("getBZString string size mismatch");
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             str.assign(buf.data(), size - 1); // don't copy null terminator
 | |
|             if (str.size() != ((size_t)size - 1))
 | |
|             {
 | |
|                 fail("getBZString string size mismatch (null terminator)");
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     CompressedBSAFile::CompressedBSAFile()
 | |
|         : mCompressedByDefault(false)
 | |
|         , mEmbeddedFileNames(false)
 | |
|     {
 | |
|     }
 | |
| 
 | |
|     CompressedBSAFile::~CompressedBSAFile() = default;
 | |
| 
 | |
|     /// Read header information from the input source
 | |
|     void CompressedBSAFile::readHeader()
 | |
|     {
 | |
|         assert(!mIsLoaded);
 | |
| 
 | |
|         std::ifstream input(mFilepath, std::ios_base::binary);
 | |
| 
 | |
|         // Total archive size
 | |
|         std::streamoff fsize = 0;
 | |
|         if (input.seekg(0, std::ios_base::end))
 | |
|         {
 | |
|             fsize = input.tellg();
 | |
|             input.seekg(0);
 | |
|         }
 | |
| 
 | |
|         if (fsize < 36) // header is 36 bytes
 | |
|             fail("File too small to be a valid BSA archive");
 | |
| 
 | |
|         // Get essential header numbers
 | |
|         // size_t dirsize, filenum;
 | |
|         std::uint32_t archiveFlags, folderCount, totalFileNameLength;
 | |
|         {
 | |
|             // First 36 bytes
 | |
|             std::uint32_t header[9];
 | |
| 
 | |
|             input.read(reinterpret_cast<char*>(header), 36);
 | |
| 
 | |
|             if (header[0] != 0x00415342) /*"BSA\x00"*/
 | |
|                 fail("Unrecognized compressed BSA format");
 | |
|             mVersion = header[1];
 | |
|             if (mVersion != 0x67 /*TES4*/ && mVersion != 0x68 /*FO3, FNV, TES5*/ && mVersion != 0x69 /*SSE*/)
 | |
|                 fail("Unrecognized compressed BSA version");
 | |
| 
 | |
|             // header[2] is offset, should be 36 = 0x24 which is the size of the header
 | |
| 
 | |
|             // Oblivion - Meshes.bsa
 | |
|             //
 | |
|             // 0111 1000 0111 = 0x0787
 | |
|             //  ^^^ ^     ^^^
 | |
|             //  ||| |     ||+-- has names for dirs  (mandatory?)
 | |
|             //  ||| |     |+--- has names for files (mandatory?)
 | |
|             //  ||| |     +---- files are compressed by default
 | |
|             //  ||| |
 | |
|             //  ||| +---------- unknown (TES5: retain strings during startup)
 | |
|             //  ||+------------ unknown (TES5: embedded file names)
 | |
|             //  |+------------- unknown
 | |
|             //  +-------------- unknown
 | |
|             //
 | |
|             archiveFlags = header[3];
 | |
|             folderCount = header[4];
 | |
|             // header[5] - fileCount
 | |
|             // totalFolderNameLength = header[6];
 | |
|             totalFileNameLength = header[7];
 | |
|             // header[8]; // fileFlags : an opportunity to optimize here
 | |
| 
 | |
|             mCompressedByDefault = (archiveFlags & 0x4) != 0;
 | |
|             if (mVersion == 0x68 || mVersion == 0x69) /*FO3, FNV, TES5, SSE*/
 | |
|                 mEmbeddedFileNames = (archiveFlags & 0x100) != 0;
 | |
|         }
 | |
| 
 | |
|         // folder records
 | |
|         std::uint64_t hash;
 | |
|         FolderRecord fr;
 | |
|         for (std::uint32_t i = 0; i < folderCount; ++i)
 | |
|         {
 | |
|             input.read(reinterpret_cast<char*>(&hash), 8);
 | |
|             input.read(reinterpret_cast<char*>(&fr.count), 4); // not sure purpose of count
 | |
|             if (mVersion == 0x69) // SSE
 | |
|             {
 | |
|                 std::uint32_t unknown;
 | |
|                 input.read(reinterpret_cast<char*>(&unknown), 4);
 | |
|                 input.read(reinterpret_cast<char*>(&fr.offset), 8);
 | |
|             }
 | |
|             else
 | |
|                 input.read(reinterpret_cast<char*>(&fr.offset), 4); // not sure purpose of offset
 | |
| 
 | |
|             auto lb = mFolders.lower_bound(hash);
 | |
|             if (lb != mFolders.end() && !(mFolders.key_comp()(hash, lb->first)))
 | |
|                 fail("Archive found duplicate folder name hash");
 | |
|             else
 | |
|                 mFolders.insert(lb, std::pair<std::uint64_t, FolderRecord>(hash, fr));
 | |
|         }
 | |
| 
 | |
|         // file record blocks
 | |
|         std::uint64_t fileHash;
 | |
|         FileRecord file;
 | |
| 
 | |
|         std::string folder;
 | |
|         std::uint64_t folderHash;
 | |
|         if ((archiveFlags & 0x1) == 0)
 | |
|             folderCount = 1; // TODO: not tested - unit test necessary
 | |
| 
 | |
|         mFiles.clear();
 | |
|         std::vector<std::string> fullPaths;
 | |
| 
 | |
|         for (std::uint32_t i = 0; i < folderCount; ++i)
 | |
|         {
 | |
|             if ((archiveFlags & 0x1) != 0)
 | |
|                 getBZString(folder, input);
 | |
| 
 | |
|             folderHash = generateHash(folder, {});
 | |
| 
 | |
|             auto iter = mFolders.find(folderHash);
 | |
|             if (iter == mFolders.end())
 | |
|                 fail("Archive folder name hash not found");
 | |
| 
 | |
|             for (std::uint32_t j = 0; j < iter->second.count; ++j)
 | |
|             {
 | |
|                 input.read(reinterpret_cast<char*>(&fileHash), 8);
 | |
|                 input.read(reinterpret_cast<char*>(&file.size), 4);
 | |
|                 input.read(reinterpret_cast<char*>(&file.offset), 4);
 | |
| 
 | |
|                 auto lb = iter->second.files.lower_bound(fileHash);
 | |
|                 if (lb != iter->second.files.end() && !(iter->second.files.key_comp()(fileHash, lb->first)))
 | |
|                     fail("Archive found duplicate file name hash");
 | |
| 
 | |
|                 iter->second.files.insert(lb, std::pair<std::uint64_t, FileRecord>(fileHash, file));
 | |
| 
 | |
|                 FileStruct fileStruct{};
 | |
|                 fileStruct.fileSize = file.getSizeWithoutCompressionFlag();
 | |
|                 fileStruct.offset = file.offset;
 | |
|                 mFiles.push_back(fileStruct);
 | |
| 
 | |
|                 fullPaths.push_back(folder);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // file record blocks
 | |
|         if ((archiveFlags & 0x2) != 0)
 | |
|         {
 | |
|             mStringBuf.resize(totalFileNameLength);
 | |
|             input.read(mStringBuf.data(), mStringBuf.size()); // TODO: maybe useful in building a lookup map?
 | |
|         }
 | |
| 
 | |
|         size_t mStringBuffOffset = 0;
 | |
|         size_t totalStringsSize = 0;
 | |
|         for (std::uint32_t fileIndex = 0; fileIndex < mFiles.size(); ++fileIndex)
 | |
|         {
 | |
| 
 | |
|             if (mStringBuffOffset >= totalFileNameLength)
 | |
|             {
 | |
|                 fail("Corrupted names record in BSA file");
 | |
|             }
 | |
| 
 | |
|             // The vector guarantees that its elements occupy contiguous memory
 | |
|             mFiles[fileIndex].setNameInfos(mStringBuffOffset, &mStringBuf);
 | |
| 
 | |
|             fullPaths.at(fileIndex) += "\\" + std::string(mStringBuf.data() + mStringBuffOffset);
 | |
| 
 | |
|             while (mStringBuffOffset < totalFileNameLength)
 | |
|             {
 | |
|                 if (mStringBuf[mStringBuffOffset] != '\0')
 | |
|                 {
 | |
|                     mStringBuffOffset++;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     mStringBuffOffset++;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             // we want to keep one more 0 character at the end of each string
 | |
|             totalStringsSize += fullPaths.at(fileIndex).length() + 1u;
 | |
|         }
 | |
|         mStringBuf.resize(totalStringsSize);
 | |
| 
 | |
|         mStringBuffOffset = 0;
 | |
|         for (std::uint32_t fileIndex = 0u; fileIndex < mFiles.size(); fileIndex++)
 | |
|         {
 | |
|             size_t stringLength = fullPaths.at(fileIndex).length();
 | |
| 
 | |
|             std::copy(fullPaths.at(fileIndex).c_str(),
 | |
|                 // plus 1 because we also want to copy 0 at the end of the string
 | |
|                 fullPaths.at(fileIndex).c_str() + stringLength + 1u, mStringBuf.data() + mStringBuffOffset);
 | |
| 
 | |
|             mFiles[fileIndex].setNameInfos(mStringBuffOffset, &mStringBuf);
 | |
| 
 | |
|             mStringBuffOffset += stringLength + 1u;
 | |
|         }
 | |
| 
 | |
|         if (mStringBuffOffset != mStringBuf.size())
 | |
|         {
 | |
|             fail("Could not resolve names of files in BSA file");
 | |
|         }
 | |
| 
 | |
|         convertCompressedSizesToUncompressed();
 | |
|         mIsLoaded = true;
 | |
|     }
 | |
| 
 | |
|     CompressedBSAFile::FileRecord CompressedBSAFile::getFileRecord(const std::string& str) const
 | |
|     {
 | |
|         for (const auto c : str)
 | |
|         {
 | |
|             if (((static_cast<unsigned>(c) >> 7U) & 1U) != 0U)
 | |
|             {
 | |
|                 fail("File record " + str + " contains unicode characters, refusing to load.");
 | |
|             }
 | |
|         }
 | |
| 
 | |
| #ifdef _WIN32
 | |
|         const auto& path = str;
 | |
| #else
 | |
|         // Force-convert the path into something UNIX can handle first
 | |
|         // to make sure std::filesystem::path doesn't think the entire path is the filename on Linux
 | |
|         // and subsequently purge it to determine the file folder.
 | |
|         std::string path = str;
 | |
|         std::replace(path.begin(), path.end(), '\\', '/');
 | |
| #endif
 | |
| 
 | |
|         const auto p = std::filesystem::path{ path }; // Purposefully damage Unicode strings.
 | |
|         const auto stem = p.stem();
 | |
|         const auto ext = p.extension().string(); // Purposefully damage Unicode strings.
 | |
| 
 | |
|         std::uint64_t folderHash = generateHash(p.parent_path(), {});
 | |
| 
 | |
|         auto it = mFolders.find(folderHash);
 | |
|         if (it == mFolders.end())
 | |
|             return FileRecord(); // folder not found, return default which has offset of sInvalidOffset
 | |
| 
 | |
|         std::uint64_t fileHash = generateHash(stem, ext);
 | |
|         auto iter = it->second.files.find(fileHash);
 | |
|         if (iter == it->second.files.end())
 | |
|             return FileRecord(); // file not found, return default which has offset of sInvalidOffset
 | |
| 
 | |
|         return iter->second;
 | |
|     }
 | |
| 
 | |
|     Files::IStreamPtr CompressedBSAFile::getFile(const FileStruct* file)
 | |
|     {
 | |
|         FileRecord fileRec = getFileRecord(file->name());
 | |
|         if (!fileRec.isValid())
 | |
|         {
 | |
|             fail("File not found: " + std::string(file->name()));
 | |
|         }
 | |
|         return getFile(fileRec);
 | |
|     }
 | |
| 
 | |
|     void CompressedBSAFile::addFile(const std::string& filename, std::istream& file)
 | |
|     {
 | |
|         assert(false); // not implemented yet
 | |
|         fail("Add file is not implemented for compressed BSA: " + filename);
 | |
|     }
 | |
| 
 | |
|     Files::IStreamPtr CompressedBSAFile::getFile(const char* file)
 | |
|     {
 | |
|         FileRecord fileRec = getFileRecord(file);
 | |
|         if (!fileRec.isValid())
 | |
|         {
 | |
|             fail("File not found: " + std::string(file));
 | |
|         }
 | |
|         return getFile(fileRec);
 | |
|     }
 | |
| 
 | |
|     Files::IStreamPtr CompressedBSAFile::getFile(const FileRecord& fileRecord)
 | |
|     {
 | |
|         size_t size = fileRecord.getSizeWithoutCompressionFlag();
 | |
|         size_t uncompressedSize = size;
 | |
|         bool compressed = fileRecord.isCompressed(mCompressedByDefault);
 | |
|         Files::IStreamPtr streamPtr = Files::openConstrainedFileStream(mFilepath, fileRecord.offset, size);
 | |
|         std::istream* fileStream = streamPtr.get();
 | |
|         if (mEmbeddedFileNames)
 | |
|         {
 | |
|             // Skip over the embedded file name
 | |
|             char length = 0;
 | |
|             fileStream->read(&length, 1);
 | |
|             fileStream->ignore(length);
 | |
|             size -= length + sizeof(char);
 | |
|         }
 | |
|         if (compressed)
 | |
|         {
 | |
|             fileStream->read(reinterpret_cast<char*>(&uncompressedSize), sizeof(uint32_t));
 | |
|             size -= sizeof(uint32_t);
 | |
|         }
 | |
|         auto memoryStreamPtr = std::make_unique<MemoryInputStream>(uncompressedSize);
 | |
| 
 | |
|         if (compressed)
 | |
|         {
 | |
|             if (mVersion != 0x69) // Non-SSE: zlib
 | |
|             {
 | |
|                 boost::iostreams::filtering_streambuf<boost::iostreams::input> inputStreamBuf;
 | |
|                 inputStreamBuf.push(boost::iostreams::zlib_decompressor());
 | |
|                 inputStreamBuf.push(*fileStream);
 | |
| 
 | |
|                 boost::iostreams::basic_array_sink<char> sr(memoryStreamPtr->getRawData(), uncompressedSize);
 | |
|                 boost::iostreams::copy(inputStreamBuf, sr);
 | |
|             }
 | |
|             else // SSE: lz4
 | |
|             {
 | |
|                 auto buffer = std::vector<char>(size);
 | |
|                 fileStream->read(buffer.data(), size);
 | |
|                 LZ4F_decompressionContext_t context = nullptr;
 | |
|                 LZ4F_createDecompressionContext(&context, LZ4F_VERSION);
 | |
|                 LZ4F_decompressOptions_t options = {};
 | |
|                 LZ4F_errorCode_t errorCode = LZ4F_decompress(
 | |
|                     context, memoryStreamPtr->getRawData(), &uncompressedSize, buffer.data(), &size, &options);
 | |
|                 if (LZ4F_isError(errorCode))
 | |
|                     fail("LZ4 decompression error (file " + Files::pathToUnicodeString(mFilepath)
 | |
|                         + "): " + LZ4F_getErrorName(errorCode));
 | |
|                 errorCode = LZ4F_freeDecompressionContext(context);
 | |
|                 if (LZ4F_isError(errorCode))
 | |
|                     fail("LZ4 decompression error (file " + Files::pathToUnicodeString(mFilepath)
 | |
|                         + "): " + LZ4F_getErrorName(errorCode));
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             fileStream->read(memoryStreamPtr->getRawData(), size);
 | |
|         }
 | |
| 
 | |
|         return std::make_unique<Files::StreamWithBuffer<MemoryInputStream>>(std::move(memoryStreamPtr));
 | |
|     }
 | |
| 
 | |
|     // mFiles used by OpenMW expects uncompressed sizes
 | |
|     void CompressedBSAFile::convertCompressedSizesToUncompressed()
 | |
|     {
 | |
|         for (auto& mFile : mFiles)
 | |
|         {
 | |
|             const FileRecord& fileRecord = getFileRecord(mFile.name());
 | |
|             if (!fileRecord.isValid())
 | |
|             {
 | |
|                 fail("Could not find file " + std::string(mFile.name()) + " in BSA");
 | |
|             }
 | |
| 
 | |
|             if (!fileRecord.isCompressed(mCompressedByDefault))
 | |
|             {
 | |
|                 // no need to fix fileSize in mFiles - uncompressed size already set
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             Files::IStreamPtr dataBegin = Files::openConstrainedFileStream(
 | |
|                 mFilepath, fileRecord.offset, fileRecord.getSizeWithoutCompressionFlag());
 | |
| 
 | |
|             if (mEmbeddedFileNames)
 | |
|             {
 | |
|                 std::string embeddedFileName;
 | |
|                 getBZString(embeddedFileName, *(dataBegin.get()));
 | |
|             }
 | |
| 
 | |
|             dataBegin->read(reinterpret_cast<char*>(&(mFile.fileSize)), sizeof(mFile.fileSize));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     std::uint64_t CompressedBSAFile::generateHash(const std::filesystem::path& stem, std::string extension)
 | |
|     {
 | |
|         auto str = stem.u8string();
 | |
|         size_t len = str.length();
 | |
|         if (len == 0)
 | |
|             return 0;
 | |
|         std::replace(str.begin(), str.end(), '/', '\\');
 | |
|         Misc::StringUtils::lowerCaseInPlace(str);
 | |
|         uint64_t result = str[len - 1] | (len >= 3 ? (str[len - 2] << 8) : 0) | (len << 16) | (str[0] << 24);
 | |
|         if (len >= 4)
 | |
|         {
 | |
|             uint32_t hash = 0;
 | |
|             for (size_t i = 1; i <= len - 3; ++i)
 | |
|                 hash = hash * 0x1003f + str[i];
 | |
|             result += static_cast<uint64_t>(hash) << 32;
 | |
|         }
 | |
|         if (extension.empty())
 | |
|             return result;
 | |
|         Misc::StringUtils::lowerCaseInPlace(extension);
 | |
|         if (extension == ".kf")
 | |
|             result |= 0x80;
 | |
|         else if (extension == ".nif")
 | |
|             result |= 0x8000;
 | |
|         else if (extension == ".dds")
 | |
|             result |= 0x8080;
 | |
|         else if (extension == ".wav")
 | |
|             result |= 0x80000000;
 | |
|         uint32_t hash = 0;
 | |
|         for (const auto& c : extension)
 | |
|             hash = hash * 0x1003f + c;
 | |
|         result += static_cast<uint64_t>(hash) << 32;
 | |
|         return result;
 | |
|     }
 | |
| 
 | |
| } // namespace Bsa
 |