From 30a213a9b3bccfcfad3e50b5eb96ecb50c6e8d70 Mon Sep 17 00:00:00 2001 From: Date: Fri, 22 Sep 2017 21:08:25 -0500 Subject: [PATCH] updates for nifstream optimization including fixing the non-x86 path for little endian reads --- components/nif/nifstream.hpp | 84 ++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/components/nif/nifstream.hpp b/components/nif/nifstream.hpp index 4d7e39ecb..290800042 100644 --- a/components/nif/nifstream.hpp +++ b/components/nif/nifstream.hpp @@ -14,6 +14,8 @@ #include #include +#include + #include "niftypes.hpp" namespace Nif @@ -21,49 +23,62 @@ namespace Nif class NIFFile; -template inline void readLittleEndianBufferOfType(Files::IStreamPtr &pIStream, T* dest) +/* + readLittleEndianBufferOfType: This template should only be used with non POD data types +*/ +template inline void readLittleEndianBufferOfType(Files::IStreamPtr &pIStream, T* dest) { #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) pIStream->read((char*)dest, numInstances * sizeof(T)); #else - char buffer[numInstances * sizeof(T)]; - pIStream->read((char*)buffer, numInstances * sizeof(T)); + uint8_t* destByteBuffer = (uint8_t*)dest; + pIStream->read((char*)dest, numInstances * sizeof(T)); /* Due to the loop iterations being known at compile time, this nested loop will most likely be unrolled + For example, for 2 instances of a 4 byte data type, you should get the below result */ + union { + IntegerT i; + T t; + } u; for (uint32_t i = 0; i < numInstances; i++) { - dest[i] = 0; + u = { 0 }; for (uint32_t byte = 0; byte < sizeof(T); byte++) - dest[i] |= ((T)buffer[i * sizeof(T) + byte]) << (byte * 8); + u.i |= (((IntegerT)destByteBuffer[i * sizeof(T) + byte]) << (byte * 8)); + dest[i] = u.t; } #endif } -template inline void readLittleEndianDynamicBufferOfType(Files::IStreamPtr &pIStream, T* dest, uint32_t numInstances) +/* + readLittleEndianDynamicBufferOfType: This template should only be used with non POD data types +*/ +template inline void readLittleEndianDynamicBufferOfType(Files::IStreamPtr &pIStream, T* dest, uint32_t numInstances) { #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) pIStream->read((char*)dest, numInstances * sizeof(T)); #else - char buffer[numInstances * sizeof(T)]; - pIStream->read((char*)buffer, numInstances * sizeof(T)); - /* - Due to the loop iterations being known at compile time, - this nested loop will most likely be unrolled - */ + uint8_t* destByteBuffer = (uint8_t*)dest; + pIStream->read((char*)dest, numInstances * sizeof(T)); + union { + IntegerT i; + T t; + } u; for (uint32_t i = 0; i < numInstances; i++) { - dest[i] = 0; + u.i = 0; for (uint32_t byte = 0; byte < sizeof(T); byte++) - dest[i] |= ((T)buffer[i * sizeof(T) + byte]) << (byte * 8); + u.i |= ((IntegerT)destByteBuffer[i * sizeof(T) + byte]) << (byte * 8); + dest[i] = u.t; } #endif } -template type inline readLittleEndianType(Files::IStreamPtr &pIStream) +template type inline readLittleEndianType(Files::IStreamPtr &pIStream) { type val; - readLittleEndianBufferOfType<1,type>(pIStream, (type*)&val); + readLittleEndianBufferOfType<1,type,IntegerT>(pIStream, (type*)&val); return val; } @@ -82,52 +97,52 @@ public: char getChar() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } short getShort() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } unsigned short getUShort() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } int getInt() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } unsigned int getUInt() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } float getFloat() { - return readLittleEndianType(inp); + return readLittleEndianType(inp); } osg::Vec2f getVector2() { osg::Vec2f vec; - readLittleEndianBufferOfType<2,float>(inp, (float*)&vec._v[0]); + readLittleEndianBufferOfType<2,float,uint32_t>(inp, (float*)&vec._v[0]); return vec; } osg::Vec3f getVector3() { osg::Vec3f vec; - readLittleEndianBufferOfType<3, float>(inp, (float*)&vec._v[0]); + readLittleEndianBufferOfType<3, float,uint32_t>(inp, (float*)&vec._v[0]); return vec; } osg::Vec4f getVector4() { osg::Vec4f vec; - readLittleEndianBufferOfType<4, float>(inp, (float*)&vec._v[0]); + readLittleEndianBufferOfType<4, float,uint32_t>(inp, (float*)&vec._v[0]); return vec; } Matrix3 getMatrix3() { Matrix3 mat; - readLittleEndianBufferOfType<9, float>(inp, (float*)&mat.mValues); + readLittleEndianBufferOfType<9, float,uint32_t>(inp, (float*)&mat.mValues); return mat; } osg::Quat getQuaternion() { float f[4]; - readLittleEndianBufferOfType<4, float>(inp, (float*)&f); + readLittleEndianBufferOfType<4, float,uint32_t>(inp, (float*)&f); osg::Quat quat; quat.w() = f[0]; quat.x() = f[1]; @@ -153,7 +168,7 @@ public: } ///Read in a string of the length specified in the file std::string getString() { - size_t size = readLittleEndianType(inp); + size_t size = readLittleEndianType(inp); return getString(size); } ///This is special since the version string doesn't start with a number, and ends with "\n" @@ -165,23 +180,26 @@ public: void getUShorts(std::vector &vec, size_t size) { vec.resize(size); - readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); + readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); } void getFloats(std::vector &vec, size_t size) { vec.resize(size); - readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); + readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); } void getVector2s(std::vector &vec, size_t size) { vec.resize(size); - readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); + /* The packed storage of each Vec2f is 2 floats exactly */ + readLittleEndianDynamicBufferOfType(inp,(float*) &vec.front(), size*2); } void getVector3s(std::vector &vec, size_t size) { vec.resize(size); - readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); + /* The packed storage of each Vec3f is 3 floats exactly */ + readLittleEndianDynamicBufferOfType(inp, (float*) &vec.front(), size*3); } void getVector4s(std::vector &vec, size_t size) { vec.resize(size); - readLittleEndianDynamicBufferOfType(inp, &vec.front(), size); + /* The packed storage of each Vec4f is 4 floats exactly */ + readLittleEndianDynamicBufferOfType(inp, (float*) &vec.front(), size*4); } void getQuaternions(std::vector &quat, size_t size) { quat.resize(size);