first pass on optimization of nif parsing functions from the file stream

2025-07-12 21:21:42 +00:00 · 2017-09-20 23:53:12 -05:00 · 2017-09-20 23:53:12 -05:00 · c72aa19d6d
commit c72aa19d6d
parent 8c6a8ca48d
3 changed files with 152 additions and 154 deletions
--- a/components/files/constrainedfilestream.cpp
+++ b/components/files/constrainedfilestream.cpp
@ -8,7 +8,7 @@
 namespace
 {
 // somewhat arbitrary though 64KB buffers didn't seem to improve performance any
-const size_t sBufferSize = 4096;
+const size_t sBufferSize = 8192;
 }

 namespace Files
--- a/components/nif/nifstream.cpp
+++ b/components/nif/nifstream.cpp
@ -6,138 +6,8 @@ namespace Nif
 {

 //Private functions
-uint8_t NIFStream::read_byte()
-{
-    uint8_t byte;
-    inp->read((char*)&byte, 1);
-    return byte;
-}
-uint16_t NIFStream::read_le16()
-{
-    uint8_t buffer[2];
-    inp->read((char*)buffer, 2);
-    return buffer[0] | (buffer[1]<<8);
-}
-uint32_t NIFStream::read_le32()
-{
-    uint8_t buffer[4];
-    inp->read((char*)buffer, 4);
-    return buffer[0] | (buffer[1]<<8) | (buffer[2]<<16) | (buffer[3]<<24);
-}
-float NIFStream::read_le32f()
-{
-    union {
-        uint32_t i;
-        float f;
-    } u = { read_le32() };
-    return u.f;
-}
+

 //Public functions
-osg::Vec2f NIFStream::getVector2()
-{
-    osg::Vec2f vec;
-    for(size_t i = 0;i < 2;i++)
-        vec._v[i] = getFloat();
-    return vec;
-}
-osg::Vec3f NIFStream::getVector3()
-{
-    osg::Vec3f vec;
-    for(size_t i = 0;i < 3;i++)
-        vec._v[i] = getFloat();
-    return vec;
-}
-osg::Vec4f NIFStream::getVector4()
-{
-    osg::Vec4f vec;
-    for(size_t i = 0;i < 4;i++)
-        vec._v[i] = getFloat();
-    return vec;
-}
-Matrix3 NIFStream::getMatrix3()
-{
-    Matrix3 mat;
-    for(size_t i = 0;i < 3;i++)
-    {
-        for(size_t j = 0;j < 3;j++)
-            mat.mValues[i][j] = getFloat();
-    }
-    return mat;
-}
-osg::Quat NIFStream::getQuaternion()
-{
-    osg::Quat quat;
-    quat.w() = getFloat();
-    quat.x() = getFloat();
-    quat.y() = getFloat();
-    quat.z() = getFloat();
-    return quat;
-}
-Transformation NIFStream::getTrafo()
-{
-    Transformation t;
-    t.pos = getVector3();
-    t.rotation = getMatrix3();
-    t.scale = getFloat();
-    return t;
-}
-
-std::string NIFStream::getString(size_t length)
-{
-    std::vector<char> str (length+1, 0);
-
-    inp->read(&str[0], length);
-
-    return &str[0];
-}
-std::string NIFStream::getString()
-{
-    size_t size = read_le32();
-    return getString(size);
-}
-std::string NIFStream::getVersionString()
-{
-    std::string result;
-    std::getline(*inp, result);
-    return result;
-}
-
-void NIFStream::getUShorts(std::vector<unsigned short> &vec, size_t size)
-{
-    vec.resize(size);
-    for(size_t i = 0;i < vec.size();i++)
-        vec[i] = getUShort();
-}
-void NIFStream::getFloats(std::vector<float> &vec, size_t size)
-{
-    vec.resize(size);
-    for(size_t i = 0;i < vec.size();i++)
-        vec[i] = getFloat();
-}
-void NIFStream::getVector2s(std::vector<osg::Vec2f> &vec, size_t size)
-{
-    vec.resize(size);
-    for(size_t i = 0;i < vec.size();i++)
-        vec[i] = getVector2();
-}
-void NIFStream::getVector3s(std::vector<osg::Vec3f> &vec, size_t size)
-{
-    vec.resize(size);
-    for(size_t i = 0;i < vec.size();i++)
-        vec[i] = getVector3();
-}
-void NIFStream::getVector4s(std::vector<osg::Vec4f> &vec, size_t size)
-{
-    vec.resize(size);
-    for(size_t i = 0;i < vec.size();i++)
-        vec[i] = getVector4();
-}
-void NIFStream::getQuaternions(std::vector<osg::Quat> &quat, size_t size)
-{
-    quat.resize(size);
-    for(size_t i = 0;i < quat.size();i++)
-        quat[i] = getQuaternion();
-}

 }
--- a/components/nif/nifstream.hpp
+++ b/components/nif/nifstream.hpp
@ -2,7 +2,7 @@

 #ifndef OPENMW_COMPONENTS_NIF_NIFSTREAM_HPP
 #define OPENMW_COMPONENTS_NIF_NIFSTREAM_HPP
-
+#include <xmmintrin.h>
 #include <cassert>
 #include <stdint.h>
 #include <stdexcept>
@ -26,10 +26,44 @@ class NIFStream {
    /// Input stream
    Files::IStreamPtr inp;

-    uint8_t read_byte();
-    uint16_t read_le16();
-    uint32_t read_le32();
-    float read_le32f();
+    uint8_t read_byte() {
+        uint8_t byte;
+        inp->read((char*)&byte, 1);
+        return byte;
+    }
+
+    uint16_t read_le16() {
+        alignas(2) uint8_t buffer[2];
+        inp->read((char*)buffer, 2);
+        return static_cast<uint16_t>(*((uint16_t*)buffer));
+    }
+    uint32_t read_le32() {
+        alignas(4) uint8_t buffer[4];
+        inp->read((char*)buffer, 4);
+        return static_cast<uint32_t>(*((uint32_t*)buffer));
+    }
+    uint64_t read_le64() {
+        alignas(8) uint8_t buffer[8];
+        inp->read((char*)buffer, 8);
+        return static_cast<uint64_t>(*((uint64_t*)buffer));
+    }
+    __m128 read_le96() {
+        alignas(8) uint8_t buffer[16];
+        inp->read((char*)buffer, 12);
+        return static_cast<__m128>(*((__m128*)buffer));
+    }
+    __m128 read_le128() {
+        alignas(16) uint8_t buffer[16];
+        inp->read((char*)buffer, 16);
+        return static_cast<__m128>(*((__m128*)buffer));
+    }
+    float read_le32f() {
+        union {
+            uint32_t i;
+            float f;
+        } u = { read_le32() };
+        return u.f;
+    }

 public:

@ -46,26 +80,120 @@ public:
    unsigned int getUInt() { return read_le32(); }
    float getFloat() { return read_le32f(); }

-    osg::Vec2f getVector2();
-    osg::Vec3f getVector3();
-    osg::Vec4f getVector4();
-    Matrix3 getMatrix3();
-    osg::Quat getQuaternion();
-    Transformation getTrafo();
+    osg::Vec2f getVector2() {
+        union {
+            uint64_t i;
+            float f[2];
+        } u = { read_le64() };
+        osg::Vec2f vec;
+        for (size_t i = 0;i < 2;i++)
+            vec._v[i] = u.f[i];
+        return vec;
+    }
+    osg::Vec3f getVector3() {
+        union {
+            __m128 i;
+            float f[4];
+        } u = { read_le96() };
+        osg::Vec3f vec;
+        for (size_t i = 0;i < 3;i++)
+            vec._v[i] = u.f[i];
+        return vec;
+    }
+    osg::Vec4f getVector4() {
+        union {
+            __m128 i;
+            float f[4];
+        } u = { read_le128() };
+        osg::Vec4f vec;
+        for (size_t i = 0;i < 4;i++)
+            vec._v[i] = u.f[i];
+        return vec;
+    }
+    Matrix3 getMatrix3() {
+        Matrix3 mat;
+        alignas(16) union {
+            float f[9];
+            uint8_t buffer[36];
+        } u;
+        inp->read((char*)u.buffer, 36);
+        for (size_t i = 0;i < 3;i++)
+        {
+            for (size_t j = 0;j < 3;j++)
+                mat.mValues[i][j] = u.f[3*i+j];
+        }
+        return mat;
+    }
+    osg::Quat getQuaternion() {
+        union {
+            __m128 i;
+            float f[4];
+        } u = { read_le128() };
+        osg::Quat quat;
+        quat.w() = u.f[0];
+        quat.x() = u.f[1];
+        quat.y() = u.f[2];
+        quat.z() = u.f[3];
+        return quat;
+    }
+    Transformation getTrafo() {
+        Transformation t;
+        t.pos = getVector3();
+        t.rotation = getMatrix3();
+        t.scale = getFloat();
+        return t;
+    }

    ///Read in a string of the given length
-    std::string getString(size_t length);
-    ///Read in a string of the length specified in the file
-    std::string getString();
-    ///This is special since the version string doesn't start with a number, and ends with "\n"
-    std::string getVersionString();
+    std::string getString(size_t length) {
+        std::vector<char> str(length + 1, 0);

-    void getUShorts(std::vector<unsigned short> &vec, size_t size);
-    void getFloats(std::vector<float> &vec, size_t size);
-    void getVector2s(std::vector<osg::Vec2f> &vec, size_t size);
-    void getVector3s(std::vector<osg::Vec3f> &vec, size_t size);
-    void getVector4s(std::vector<osg::Vec4f> &vec, size_t size);
-    void getQuaternions(std::vector<osg::Quat> &quat, size_t size);
+        inp->read(&str[0], length);
+
+        return &str[0];
+    }
+    ///Read in a string of the length specified in the file
+    std::string getString() {
+        size_t size = read_le32();
+        return getString(size);
+    }
+    ///This is special since the version string doesn't start with a number, and ends with "\n"
+    std::string getVersionString() {
+        std::string result;
+        std::getline(*inp, result);
+        return result;
+    }
+
+    void getUShorts(std::vector<unsigned short> &vec, size_t size) {
+        vec.resize(size);
+        for (size_t i = 0;i < vec.size();i++)
+            vec[i] = getUShort();
+    }
+    void getFloats(std::vector<float> &vec, size_t size) {
+        vec.resize(size);
+        for (size_t i = 0;i < vec.size();i++)
+            vec[i] = getFloat();
+    }
+    void getVector2s(std::vector<osg::Vec2f> &vec, size_t size) {
+        vec.resize(size);
+        for (size_t i = 0;i < vec.size();i++)
+            vec[i] = getVector2();
+    }
+    void getVector3s(std::vector<osg::Vec3f> &vec, size_t size) {
+        vec.resize(size);
+        for (size_t i = 0;i < vec.size();i++)
+            vec[i] = getVector3();
+    }
+    void getVector4s(std::vector<osg::Vec4f> &vec, size_t size) {
+        vec.resize(size);
+        for (size_t i = 0;i < vec.size();i++)
+            vec[i] = getVector4();
+    }
+    void getQuaternions(std::vector<osg::Quat> &quat, size_t size) {
+        quat.resize(size);
+        for (size_t i = 0;i < quat.size();i++)
+            quat[i] = getQuaternion();
+    }
 };

 }