mirror of
				https://github.com/OpenMW/openmw.git
				synced 2025-10-26 11:26:41 +00:00 
			
		
		
		
	We currently build a large map of a BSAFile's contents unused by Open MW. We already map archive contents in VFS. With this PR we remove the map from BSAFile and reimplement its only current use in BSATool.
		
			
				
	
	
		
			502 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			502 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|   OpenMW - The completely unofficial reimplementation of Morrowind
 | |
|   Copyright (C) 2008-2010  Nicolay Korslund
 | |
|   Email: < korslund@gmail.com >
 | |
|   WWW: http://openmw.sourceforge.net/
 | |
| 
 | |
|   This file (compressedbsafile.cpp) is part of the OpenMW package.
 | |
| 
 | |
|   OpenMW is distributed as free software: you can redistribute it
 | |
|   and/or modify it under the terms of the GNU General Public License
 | |
|   version 3, as published by the Free Software Foundation.
 | |
| 
 | |
|   This program is distributed in the hope that it will be useful, but
 | |
|   WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|   General Public License for more details.
 | |
| 
 | |
|   You should have received a copy of the GNU General Public License
 | |
|   version 3 along with this program. If not, see
 | |
|   http://www.gnu.org/licenses/ .
 | |
| 
 | |
|   Compressed BSA stuff added by cc9cii 2018
 | |
| 
 | |
|  */
 | |
| #include "compressedbsafile.hpp"
 | |
| 
 | |
| #include <stdexcept>
 | |
| #include <cassert>
 | |
| 
 | |
| #include <lz4frame.h>
 | |
| 
 | |
| #include <boost/scoped_array.hpp>
 | |
| #include <boost/filesystem/path.hpp>
 | |
| #include <boost/filesystem/fstream.hpp>
 | |
| 
 | |
| #include <boost/iostreams/filtering_streambuf.hpp>
 | |
| #include <boost/iostreams/copy.hpp>
 | |
| 
 | |
| #if defined(_MSC_VER)
 | |
|     #pragma warning (push)
 | |
|     #pragma warning (disable : 4706)
 | |
|     #include <boost/iostreams/filter/zlib.hpp>
 | |
|     #pragma warning (pop)
 | |
| #else
 | |
|     #include <boost/iostreams/filter/zlib.hpp>
 | |
| #endif
 | |
| 
 | |
| #include <boost/iostreams/device/array.hpp>
 | |
| #include <components/bsa/memorystream.hpp>
 | |
| #include <components/misc/stringops.hpp>
 | |
| 
 | |
| namespace Bsa
 | |
| {
 | |
| //special marker for invalid records,
 | |
| //equal to max uint32_t value
 | |
| const uint32_t CompressedBSAFile::sInvalidOffset = std::numeric_limits<uint32_t>::max();
 | |
| 
 | |
| //bit marking compression on file size
 | |
| const uint32_t CompressedBSAFile::sCompressedFlag = 1u << 30u;
 | |
| 
 | |
| 
 | |
| CompressedBSAFile::FileRecord::FileRecord() : size(0), offset(sInvalidOffset)
 | |
| { }
 | |
| 
 | |
| bool CompressedBSAFile::FileRecord::isValid() const
 | |
| {
 | |
|     return offset != sInvalidOffset;
 | |
| }
 | |
| 
 | |
| bool CompressedBSAFile::FileRecord::isCompressed(bool bsaCompressedByDefault) const
 | |
| {
 | |
|     bool recordCompressionFlagEnabled = ((size & sCompressedFlag) == sCompressedFlag);
 | |
| 
 | |
|     //record is compressed when:
 | |
|     //- bsaCompressedByDefault flag is set and 30th bit is NOT set, or
 | |
|     //- bsaCompressedByDefault flag is NOT set and 30th bit is set
 | |
|     //record is NOT compressed when:
 | |
|     //- bsaCompressedByDefault flag is NOT set and 30th bit is NOT set, or
 | |
|     //- bsaCompressedByDefault flag is set and 30th bit is set
 | |
|     return (bsaCompressedByDefault != recordCompressionFlagEnabled);
 | |
| }
 | |
| 
 | |
| std::uint32_t CompressedBSAFile::FileRecord::getSizeWithoutCompressionFlag() const {
 | |
|     return size & (~sCompressedFlag);
 | |
| }
 | |
| 
 | |
| void CompressedBSAFile::getBZString(std::string& str, std::istream& filestream)
 | |
| {
 | |
|     char size = 0;
 | |
|     filestream.read(&size, 1);
 | |
| 
 | |
|     boost::scoped_array<char> buf(new char[size]);
 | |
|     filestream.read(buf.get(), size);
 | |
| 
 | |
|     if (buf[size - 1] != 0)
 | |
|     {
 | |
|         str.assign(buf.get(), size);
 | |
|         if (str.size() != ((size_t)size)) {
 | |
|             fail("getBZString string size mismatch");
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         str.assign(buf.get(), size - 1); // don't copy null terminator
 | |
|         if (str.size() != ((size_t)size - 1)) {
 | |
|             fail("getBZString string size mismatch (null terminator)");
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| CompressedBSAFile::CompressedBSAFile()
 | |
|     : mCompressedByDefault(false), mEmbeddedFileNames(false)
 | |
| { }
 | |
| 
 | |
| CompressedBSAFile::~CompressedBSAFile() = default;
 | |
| 
 | |
| /// Read header information from the input source
 | |
| void CompressedBSAFile::readHeader()
 | |
| {
 | |
|     assert(!mIsLoaded);
 | |
| 
 | |
|     namespace bfs = boost::filesystem;
 | |
|     bfs::ifstream input(bfs::path(mFilename), std::ios_base::binary);
 | |
| 
 | |
|     // Total archive size
 | |
|     std::streamoff fsize = 0;
 | |
|     if(input.seekg(0, std::ios_base::end))
 | |
|     {
 | |
|         fsize = input.tellg();
 | |
|         input.seekg(0);
 | |
|     }
 | |
| 
 | |
|     if(fsize < 36) // header is 36 bytes
 | |
|         fail("File too small to be a valid BSA archive");
 | |
| 
 | |
|     // Get essential header numbers
 | |
|     //size_t dirsize, filenum;
 | |
|     std::uint32_t archiveFlags, folderCount, totalFileNameLength;
 | |
|     {
 | |
|         // First 36 bytes
 | |
|         std::uint32_t header[9];
 | |
| 
 | |
|         input.read(reinterpret_cast<char*>(header), 36);
 | |
| 
 | |
|         if (header[0] != 0x00415342) /*"BSA\x00"*/
 | |
|             fail("Unrecognized compressed BSA format");
 | |
|         mVersion = header[1];
 | |
|         if (mVersion != 0x67 /*TES4*/ && mVersion != 0x68 /*FO3, FNV, TES5*/ && mVersion != 0x69 /*SSE*/)
 | |
|             fail("Unrecognized compressed BSA version");
 | |
| 
 | |
|         // header[2] is offset, should be 36 = 0x24 which is the size of the header
 | |
| 
 | |
|         // Oblivion - Meshes.bsa
 | |
|         //
 | |
|         // 0111 1000 0111 = 0x0787
 | |
|         //  ^^^ ^     ^^^
 | |
|         //  ||| |     ||+-- has names for dirs  (mandatory?)
 | |
|         //  ||| |     |+--- has names for files (mandatory?)
 | |
|         //  ||| |     +---- files are compressed by default
 | |
|         //  ||| |
 | |
|         //  ||| +---------- unknown (TES5: retain strings during startup)
 | |
|         //  ||+------------ unknown (TES5: embedded file names)
 | |
|         //  |+------------- unknown
 | |
|         //  +-------------- unknown
 | |
|         //
 | |
|         archiveFlags          = header[3];
 | |
|         folderCount           = header[4];
 | |
|         // header[5] - fileCount
 | |
|         // totalFolderNameLength = header[6];
 | |
|         totalFileNameLength   = header[7];
 | |
|         // header[8]; // fileFlags : an opportunity to optimize here
 | |
| 
 | |
|         mCompressedByDefault = (archiveFlags & 0x4) != 0;
 | |
|         if (mVersion == 0x68 || mVersion == 0x69) /*FO3, FNV, TES5, SSE*/
 | |
|             mEmbeddedFileNames = (archiveFlags & 0x100) != 0;
 | |
|     }
 | |
| 
 | |
|     // folder records
 | |
|     std::uint64_t hash;
 | |
|     FolderRecord fr;
 | |
|     for (std::uint32_t i = 0; i < folderCount; ++i)
 | |
|     {
 | |
|         input.read(reinterpret_cast<char*>(&hash), 8);
 | |
|         input.read(reinterpret_cast<char*>(&fr.count), 4); // not sure purpose of count
 | |
|         if (mVersion == 0x69) // SSE
 | |
|         {
 | |
|             std::uint32_t unknown;
 | |
|             input.read(reinterpret_cast<char*>(&unknown), 4);
 | |
|             input.read(reinterpret_cast<char*>(&fr.offset), 8);
 | |
|         }
 | |
|         else
 | |
|             input.read(reinterpret_cast<char*>(&fr.offset), 4); // not sure purpose of offset
 | |
| 
 | |
|         auto lb = mFolders.lower_bound(hash);
 | |
|         if (lb != mFolders.end() && !(mFolders.key_comp()(hash, lb->first)))
 | |
|             fail("Archive found duplicate folder name hash");
 | |
|         else
 | |
|             mFolders.insert(lb, std::pair<std::uint64_t, FolderRecord>(hash, fr));
 | |
|     }
 | |
| 
 | |
|     // file record blocks
 | |
|     std::uint64_t fileHash;
 | |
|     FileRecord file;
 | |
| 
 | |
|     std::string folder;
 | |
|     std::uint64_t folderHash;
 | |
|     if ((archiveFlags & 0x1) == 0)
 | |
|         folderCount = 1; // TODO: not tested - unit test necessary
 | |
| 
 | |
|     mFiles.clear();
 | |
|     std::vector<std::string> fullPaths;
 | |
|     
 | |
|     for (std::uint32_t i = 0; i < folderCount; ++i)
 | |
|     {
 | |
|         if ((archiveFlags & 0x1) != 0)
 | |
|             getBZString(folder, input);
 | |
| 
 | |
|         folderHash = generateHash(folder, std::string());
 | |
| 
 | |
|         auto iter = mFolders.find(folderHash);
 | |
|         if (iter == mFolders.end())
 | |
|             fail("Archive folder name hash not found");
 | |
| 
 | |
|         for (std::uint32_t j = 0; j < iter->second.count; ++j)
 | |
|         {
 | |
|             input.read(reinterpret_cast<char*>(&fileHash), 8);
 | |
|             input.read(reinterpret_cast<char*>(&file.size), 4);
 | |
|             input.read(reinterpret_cast<char*>(&file.offset), 4);
 | |
| 
 | |
|             auto lb = iter->second.files.lower_bound(fileHash);
 | |
|             if (lb != iter->second.files.end() && !(iter->second.files.key_comp()(fileHash, lb->first)))
 | |
|                 fail("Archive found duplicate file name hash");
 | |
| 
 | |
|             iter->second.files.insert(lb, std::pair<std::uint64_t, FileRecord>(fileHash, file));
 | |
| 
 | |
|             FileStruct fileStruct{};
 | |
|             fileStruct.fileSize = file.getSizeWithoutCompressionFlag();
 | |
|             fileStruct.offset = file.offset;
 | |
|             mFiles.push_back(fileStruct);
 | |
| 
 | |
|             fullPaths.push_back(folder);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // file record blocks
 | |
|     if ((archiveFlags & 0x2) != 0)
 | |
|     {
 | |
|         mStringBuf.resize(totalFileNameLength);
 | |
|         input.read(&mStringBuf[0], mStringBuf.size()); // TODO: maybe useful in building a lookup map?
 | |
|     }
 | |
| 
 | |
|     size_t mStringBuffOffset = 0;
 | |
|     size_t totalStringsSize = 0;
 | |
|     for (std::uint32_t fileIndex = 0; fileIndex < mFiles.size(); ++fileIndex) {
 | |
| 
 | |
|         if (mStringBuffOffset >= totalFileNameLength) {
 | |
|             fail("Corrupted names record in BSA file");
 | |
|         }
 | |
| 
 | |
|         //The vector guarantees that its elements occupy contiguous memory
 | |
|         mFiles[fileIndex].setNameInfos(mStringBuffOffset, &mStringBuf);
 | |
| 
 | |
|         fullPaths.at(fileIndex) += "\\" + std::string(mStringBuf.data() + mStringBuffOffset);
 | |
| 
 | |
|         while (mStringBuffOffset < totalFileNameLength) {
 | |
|             if (mStringBuf[mStringBuffOffset] != '\0') {
 | |
|                 mStringBuffOffset++;
 | |
|             }
 | |
|             else {
 | |
|                 mStringBuffOffset++;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         //we want to keep one more 0 character at the end of each string
 | |
|         totalStringsSize += fullPaths.at(fileIndex).length() + 1u;
 | |
|     }
 | |
|     mStringBuf.resize(totalStringsSize);
 | |
| 
 | |
|     mStringBuffOffset = 0;
 | |
|     for (std::uint32_t fileIndex = 0u; fileIndex < mFiles.size(); fileIndex++) {
 | |
|         size_t stringLength = fullPaths.at(fileIndex).length();
 | |
| 
 | |
|         std::copy(fullPaths.at(fileIndex).c_str(),
 | |
|             //plus 1 because we also want to copy 0 at the end of the string
 | |
|             fullPaths.at(fileIndex).c_str() + stringLength + 1u,
 | |
|             mStringBuf.data() + mStringBuffOffset);
 | |
| 
 | |
|         mFiles[fileIndex].setNameInfos(mStringBuffOffset, &mStringBuf);
 | |
| 
 | |
|         mStringBuffOffset += stringLength + 1u;
 | |
|     }
 | |
| 
 | |
|     if (mStringBuffOffset != mStringBuf.size()) {
 | |
|         fail("Could not resolve names of files in BSA file");
 | |
|     }
 | |
| 
 | |
|     convertCompressedSizesToUncompressed();
 | |
|     mIsLoaded = true;
 | |
| }
 | |
| 
 | |
| CompressedBSAFile::FileRecord CompressedBSAFile::getFileRecord(const std::string& str) const
 | |
| {
 | |
|     // Force-convert the path into something both Windows and UNIX can handle first
 | |
|     // to make sure Boost doesn't think the entire path is the filename on Linux
 | |
|     // and subsequently purge it to determine the file folder.
 | |
|     std::string path = str;
 | |
|     std::replace(path.begin(), path.end(), '\\', '/');
 | |
| 
 | |
|     boost::filesystem::path p(path);
 | |
|     std::string stem = p.stem().string();
 | |
|     std::string ext = p.extension().string();
 | |
|     p.remove_filename();
 | |
| 
 | |
|     std::string folder = p.string();
 | |
|     std::uint64_t folderHash = generateHash(folder, std::string());
 | |
| 
 | |
|     auto it = mFolders.find(folderHash);
 | |
|     if (it == mFolders.end())
 | |
|         return FileRecord(); // folder not found, return default which has offset of sInvalidOffset
 | |
| 
 | |
|     std::uint64_t fileHash = generateHash(stem, ext);
 | |
|     auto iter = it->second.files.find(fileHash);
 | |
|     if (iter == it->second.files.end())
 | |
|         return FileRecord(); // file not found, return default which has offset of sInvalidOffset
 | |
| 
 | |
|     return iter->second;
 | |
| }
 | |
| 
 | |
| Files::IStreamPtr CompressedBSAFile::getFile(const FileStruct* file) 
 | |
| {
 | |
|     FileRecord fileRec = getFileRecord(file->name());
 | |
|     if (!fileRec.isValid()) {
 | |
|         fail("File not found: " + std::string(file->name()));
 | |
|     }
 | |
|     return getFile(fileRec);
 | |
| }
 | |
| 
 | |
| void CompressedBSAFile::addFile(const std::string& filename, std::istream& file)
 | |
| {
 | |
|     assert(false); //not implemented yet
 | |
|     fail("Add file is not implemented for compressed BSA: " + filename);
 | |
| }
 | |
| 
 | |
| Files::IStreamPtr CompressedBSAFile::getFile(const char* file)
 | |
| {
 | |
|     FileRecord fileRec = getFileRecord(file);
 | |
|     if (!fileRec.isValid()) {
 | |
|         fail("File not found: " + std::string(file));
 | |
|     }
 | |
|     return getFile(fileRec);
 | |
| }
 | |
| 
 | |
| Files::IStreamPtr CompressedBSAFile::getFile(const FileRecord& fileRecord)
 | |
| {
 | |
|     size_t size = fileRecord.getSizeWithoutCompressionFlag();
 | |
|     size_t uncompressedSize = size;
 | |
|     bool compressed = fileRecord.isCompressed(mCompressedByDefault);
 | |
|     Files::IStreamPtr streamPtr = Files::openConstrainedFileStream(mFilename.c_str(), fileRecord.offset, size);
 | |
|     std::istream* fileStream = streamPtr.get();
 | |
|     if (mEmbeddedFileNames)
 | |
|     {
 | |
|         // Skip over the embedded file name
 | |
|         char length = 0;
 | |
|         fileStream->read(&length, 1);
 | |
|         fileStream->ignore(length);
 | |
|         size -= length + sizeof(char);
 | |
|     }
 | |
|     if (compressed)
 | |
|     {
 | |
|         fileStream->read(reinterpret_cast<char*>(&uncompressedSize), sizeof(uint32_t));
 | |
|         size -= sizeof(uint32_t);
 | |
|     }
 | |
|     std::shared_ptr<Bsa::MemoryInputStream> memoryStreamPtr = std::make_shared<MemoryInputStream>(uncompressedSize);
 | |
| 
 | |
|     if (compressed)
 | |
|     {
 | |
|         if (mVersion != 0x69) // Non-SSE: zlib
 | |
|         {
 | |
|             boost::iostreams::filtering_streambuf<boost::iostreams::input> inputStreamBuf;
 | |
|             inputStreamBuf.push(boost::iostreams::zlib_decompressor());
 | |
|             inputStreamBuf.push(*fileStream);
 | |
| 
 | |
|             boost::iostreams::basic_array_sink<char> sr(memoryStreamPtr->getRawData(), uncompressedSize);
 | |
|             boost::iostreams::copy(inputStreamBuf, sr);
 | |
|         }
 | |
|         else // SSE: lz4
 | |
|         {
 | |
|             boost::scoped_array<char> buffer(new char[size]);
 | |
|             fileStream->read(buffer.get(), size);
 | |
|             LZ4F_decompressionContext_t context = nullptr;
 | |
|             LZ4F_createDecompressionContext(&context, LZ4F_VERSION);
 | |
|             LZ4F_decompressOptions_t options = {};
 | |
|             LZ4F_errorCode_t errorCode = LZ4F_decompress(context, memoryStreamPtr->getRawData(), &uncompressedSize, buffer.get(), &size, &options);
 | |
|             if (LZ4F_isError(errorCode))
 | |
|                 fail("LZ4 decompression error (file " + mFilename + "): " + LZ4F_getErrorName(errorCode));
 | |
|             errorCode = LZ4F_freeDecompressionContext(context);
 | |
|             if (LZ4F_isError(errorCode))
 | |
|                 fail("LZ4 decompression error (file " + mFilename + "): " + LZ4F_getErrorName(errorCode));
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         fileStream->read(memoryStreamPtr->getRawData(), size);
 | |
|     }
 | |
| 
 | |
|     return std::shared_ptr<std::istream>(memoryStreamPtr, (std::istream*)memoryStreamPtr.get());
 | |
| }
 | |
| 
 | |
| BsaVersion CompressedBSAFile::detectVersion(const std::string& filePath)
 | |
| {
 | |
|     namespace bfs = boost::filesystem;
 | |
|     bfs::ifstream input(bfs::path(filePath), std::ios_base::binary);
 | |
| 
 | |
|     // Total archive size
 | |
|     std::streamoff fsize = 0;
 | |
|     if (input.seekg(0, std::ios_base::end))
 | |
|     {
 | |
|         fsize = input.tellg();
 | |
|         input.seekg(0);
 | |
|     }
 | |
| 
 | |
|     if (fsize < 12) {
 | |
|         return BSAVER_UNKNOWN;
 | |
|     }
 | |
| 
 | |
|     // Get essential header numbers
 | |
| 
 | |
|     // First 12 bytes
 | |
|     uint32_t head[3];
 | |
| 
 | |
|     input.read(reinterpret_cast<char*>(head), 12);
 | |
| 
 | |
|     if (head[0] == static_cast<uint32_t>(BSAVER_UNCOMPRESSED)) {
 | |
|         return BSAVER_UNCOMPRESSED;
 | |
|     }
 | |
| 
 | |
|     if (head[0] == static_cast<uint32_t>(BSAVER_COMPRESSED)) {
 | |
|         return BSAVER_COMPRESSED;
 | |
|     }
 | |
| 
 | |
|     return BSAVER_UNKNOWN;
 | |
| }
 | |
| 
 | |
| //mFiles used by OpenMW expects uncompressed sizes
 | |
| void CompressedBSAFile::convertCompressedSizesToUncompressed()
 | |
| {
 | |
|     for (auto & mFile : mFiles)
 | |
|     {
 | |
|         const FileRecord& fileRecord = getFileRecord(mFile.name());
 | |
|         if (!fileRecord.isValid())
 | |
|         {
 | |
|             fail("Could not find file " + std::string(mFile.name()) + " in BSA");
 | |
|         }
 | |
| 
 | |
|         if (!fileRecord.isCompressed(mCompressedByDefault))
 | |
|         {
 | |
|             //no need to fix fileSize in mFiles - uncompressed size already set
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         Files::IStreamPtr dataBegin = Files::openConstrainedFileStream(mFilename.c_str(), fileRecord.offset, fileRecord.getSizeWithoutCompressionFlag());
 | |
| 
 | |
|         if (mEmbeddedFileNames)
 | |
|         {
 | |
|             std::string embeddedFileName;
 | |
|             getBZString(embeddedFileName, *(dataBegin.get()));
 | |
|         }
 | |
| 
 | |
|         dataBegin->read(reinterpret_cast<char*>(&(mFile.fileSize)), sizeof(mFile.fileSize));
 | |
|     }
 | |
| }
 | |
| 
 | |
| std::uint64_t CompressedBSAFile::generateHash(std::string stem, std::string extension)
 | |
| {
 | |
|     size_t len = stem.length();
 | |
|     if (len == 0)
 | |
|         return 0;
 | |
|     std::replace(stem.begin(), stem.end(), '/', '\\');
 | |
|     Misc::StringUtils::lowerCaseInPlace(stem);
 | |
|     uint64_t result = stem[len-1] | (len >= 3 ? (stem[len-2] << 8) : 0) | (len << 16) | (stem[0] << 24);
 | |
|     if (len >= 4)
 | |
|     {
 | |
|         uint32_t hash = 0;
 | |
|         for (size_t i = 1; i <= len-3; ++i)
 | |
|             hash = hash * 0x1003f + stem[i];
 | |
|         result += static_cast<uint64_t>(hash) << 32;
 | |
|     }
 | |
|     if (extension.empty())
 | |
|         return result;
 | |
|     Misc::StringUtils::lowerCaseInPlace(extension);
 | |
|     if (extension == ".kf")       result |= 0x80;
 | |
|     else if (extension == ".nif") result |= 0x8000;
 | |
|     else if (extension == ".dds") result |= 0x8080;
 | |
|     else if (extension == ".wav") result |= 0x80000000;
 | |
|     uint32_t hash = 0;
 | |
|     for (const char &c : extension)
 | |
|         hash = hash * 0x1003f + c;
 | |
|     result += static_cast<uint64_t>(hash) << 32;
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| } //namespace Bsa
 |