You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
openmw-tes3mp/bsa/bsafile.d

367 lines
10 KiB
D

/*
OpenMW - The completely unofficial reimplementation of Morrowind
Copyright (C) 2008 Nicolay Korslund
Email: < korslund@gmail.com >
WWW: http://openmw.snaptoad.com/
This file (bsafile.d) is part of the OpenMW package.
OpenMW is distributed as free software: you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 3, as published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
version 3 along with this program. If not, see
http://www.gnu.org/licenses/ .
*/
module bsa.bsafile;
//debug=checkHash;
// This file does not have any unit tests, since you really need the
// data to test it. Use the program named 'bsatool', it uses the NIF
// reader library and scans through a bsa archive, providing a good
// test of both libraries.
//import std.stream;
import std.stdio;
import std.string;
import std.mmfile;
import core.memory;
import monster.util.aa;
class BSAFileException : Exception
{
this(char[] msg) {super("BSAFileException: " ~ msg);}
}
/**
* This class is used to read "Bethesda Archive Files", or BSAs.
*
* The BSA archives are typically held open for the entire lifetime of
* the application, and are accessed more or less randomly. For that
* reason the BSAFile class uses memory mapped files. However, to be
* reasonably memory efficient, only the last requested file is
* guaranteed to be mapped at any given time, therefore make sure you
* don't use any persistant slices.
*
*/
class BSAFile
{
private:
// Size of the blocks to map with the memory mapped file. If set to
// 0, we map the entire file, but then we use a LOT of system
// memory. There is really no penalty in setting it too small, since
// multiple blocks may be mapped simultaneously. However, it MUST be
// a multiple of the system page size. TODO: On my system it is 4K,
// later on I will have to call getpagesize and the windows
// equivalent to find this (include the word "granularity" when you
// google for it.) For now I just assume 4K is ok on UNIX, but on
// Windows we need 64K. (Hands up if you agree that MMFile should
// handle this internally!). UPDATE: This is now duplicated in
// util.c_mmfile, if we make it more fancy we should collect it in
// one place.
version(Windows)
static int pageSize = 64*1024;
else
static int pageSize = 4*1024;
// Represents one file entry in the archive
struct FileStruct
{
// File size and offset in file. We store the offset from the
// beginning of the file, not the offset into the data buffer
// (which is what is stored in the archive.)
uint fileSize, offset;
char[] name;
void getName(char[] buf, uint start)
{
if(start >= buf.length)
throw new BSAFileException("Name offset outside buffer");
uint end = start;
// We have to search for the end of the name string, marked by a zero.
for(; end<buf.length; end++)
if(buf[end] == 0) break;
if(end == buf.length)
throw new BSAFileException("String buffer overflow");
name = buf[start..end];
}
// This currently isn't needed, but it would be if we wanted to
// write our own bsa archives.
debug(checkHash)
{
void hashName(out uint hash1, out uint hash2)
{
uint sum, off, temp, n;
foreach(char c; name[0..$/2])
{
sum ^= (cast(uint)c) << (off & 31);
off += 8;
}
hash1 = sum;
sum = off = 0;
foreach(char c; name[$/2..$])
{
temp = (cast(uint)c) << (off & 31);
sum ^= temp;
n = temp & 0x1F;
sum = (sum << (32-n)) | (sum >> n); // "rotate right" operation
off += 8;
}
hash2 = sum;
}
}
}
MmFile mmf; // Handle to memory mapped file
char[] filename; // File name
FileStruct files[]; // The file table is stored here
bool isLoaded; // Set to true if a file has been loaded
// An AA for fast file name lookup. The CITextHash is a
// case-insensitive text hasher, meaning that all lookups are case
// insensitive.
HashTable!(char[], int, ESMRegionAlloc, CITextHash) lookup;
void fail(char[] msg)
{
throw new BSAFileException(msg ~ "\nFile: " ~ filename);
}
void read()
{
/*
* The layout of a BSA archive is as follows:
*
* - 12 bytes header, contains 3 ints:
* id number - equal to 0x100
* dirsize - size of the directory block (see below)
* numfiles - number of files
*
* ---------- start of directory block -----------
*
* - 8 bytes*numfiles, each record contains
* fileSize
* offset into data buffer (see below)
*
* - 4 bytes*numfiles, each record is an offset into the following name buffer
*
* - name buffer, indexed by the previous table, each string is
* null-terminated. Size is (dirsize - 12*numfiles).
*
* ---------- end of directory block -------------
*
* - 8*filenum - hast table block, we currently ignore this
*
* Data buffer:
*
* - The rest of the archive is file data, indexed by the
* offsets in the directory block. The offsets start at 0 at
* the beginning of this buffer.
*
*/
assert(!isLoaded);
ulong fsize = mmf.length;
if( fsize < 12 )
fail("File too small to be a valid BSA archive");
// Recast the file header as a list of uints
uint[] array = cast(uint[]) mmf[0..12];
if(array[0] != 0x100)
fail("Unrecognized BSA header");
// Total number of bytes used in size/offset-table + filename
// sections.
uint dirsize = array[1];
debug writefln("Directory size: ", dirsize);
// Number of files
uint filenum = array[2];
debug writefln("Number of files: ", filenum);
// Each file must take up at least 21 bytes of data in the
// bsa. So if files*21 overflows the file size then we are
// guaranteed that the archive is corrupt.
if( (filenum*21 > fsize -12) ||
(dirsize+8*filenum > fsize -12) )
fail("Directory information larger than entire archive");
// Map the entire directory (skip the hashes if we don't need them)
debug(checkHash)
void[] mm = mmf[12..(12+dirsize+8*filenum)];
else
void[] mm = mmf[12..(12+dirsize)];
// Allocate the file list from esmRegion
files = esmRegion.allocateT!(FileStruct)(filenum);
// Calculate the offset of the data buffer. All file offsets are
// relative to this.
uint fileDataOffset = 12 + dirsize + 8*filenum;
// Get a slice of the size/offset table
array = cast(uint[])mm[0..(8*filenum)];
int index = 0; // Used for indexing array[]
// Read the size/offset table
foreach(ref FileStruct fs; files)
{
fs.fileSize = array[index++];
fs.offset = array[index++] + fileDataOffset;
if(fs.offset+fs.fileSize > fsize) fail("Archive contains files outside itself");
}
// Get a slice of the name offset table
array = cast(uint[])mm[(8*filenum)..(12*filenum)];
// Get a slice of the name field
char[] nameBuf = cast(char[])mm[(12*filenum)..dirsize];
// And copy it!
nameBuf = esmRegion.copy(nameBuf);
// Tell the lookup table how big it should be
lookup.rehash(filenum);
// Loop through the name offsets and pick out the names
foreach(int idx, ref FileStruct fs; files)
{
fs.getName(nameBuf, array[idx]);
lookup[fs.name] = idx;
debug(2) writefln("%d: %s, %d bytes at %x", idx,
fs.name, fs.fileSize, fs.offset);
}
// Code to check if file hashes are correct - this was mostly
// used to check our hash algorithm.
debug(checkHash)
{
// Slice the Hash table
array = cast(uint[])mm[dirsize..(dirsize+8*filenum)];
index = 0;
foreach(ref FileStruct fs; files)
{
uint h1, h2;
fs.hashName(h1,h2);
uint h11 = array[index++];
uint h22 = array[index++];
if(h1 != h11) writefln("1 : %d vs. %d", h1, h11);
if(h2 != h22) writefln("2 : %d vs. %d", h2, h22);
}
}
isLoaded = true;
}
public:
/* -----------------------------------
* BSA management methods
* -----------------------------------
*/
this() {}
this(char[] file) {open(file);}
// We should clean up after us
~this() {clear();}
// Open a new file. Clears any existing data
void open(char[] file)
{
clear();
filename = file;
// Open a memory mapped file
mmf = new MmFile(
file, // File name
MmFile.Mode.Read, // Read only
0, // We need the entire file
null, // Don't care where it is mapped
pageSize); // DON'T map the entire file, uses
// too much memory.
// Load header and file directory
read();
}
// Clear all data and close file
void clear()
{
delete mmf;
lookup.reset;
files.length = 0;
isLoaded = false;
}
/* -----------------------------------
* Archive file routines
* -----------------------------------
*/
void[] findSlice(int index)
{
if(!isLoaded)
fail("No archive is open");
if(index < 0 || index >= files.length)
fail("Index out of bounds");
//writefln("\noffset %d, filesize %d", files[index].offset, files[index].fileSize);
// Get a slice of the buffer that comprises this file
with(files[index])
return mmf[offset..offset+fileSize];
}
int getIndex(char[] name)
{
int i;
// Look it up in the AA
if( lookup.inList( name, i ) )
return i;
else
return -1;
}
// Return a slice. This routine converts the name to lower case,
// since all BSA file names are stored that way, but references to
// them in ESM/ESPs and NIFs are not.
void[] findSlice(char[] name)
{
int i = getIndex(name);
if(i == -1) return null;
return findSlice(i);
}
// Used by the 'bsatest' program to loop through the entire
// archive.
FileStruct[] getFiles() { return files; }
// Number of files in the archive
uint numFiles() { return files.length; }
// Gets the name of the archive file.
char[] getName() { return filename; }
}