Merge branch 'utf8_encoder_string_view' into 'master'

Use std::string_view for argument and return type of Utf8Encoder functions See merge request OpenMW/openmw!1652
2025-11-08 21:46:43 +00:00 · 2022-02-12 18:07:26 +00:00 · 2022-02-12 18:07:26 +00:00 · cd8967e265
commit cd8967e265
parent 17ecaf177a c75e938c46
5 changed files with 45 additions and 37 deletions
--- a/components/esm3/esmreader.cpp
+++ b/components/esm3/esmreader.cpp
@ -320,7 +320,7 @@ std::string ESMReader::getString(int size)
    // Convert to UTF8 and return
    if (mEncoder)
-        return mEncoder->getUtf8(ptr, size);
+        return std::string(mEncoder->getUtf8(std::string_view(ptr, size)));
    return std::string (ptr, size);
 }
--- a/components/esm3/esmwriter.cpp
+++ b/components/esm3/esmwriter.cpp
@ -193,9 +193,9 @@ namespace ESM
        else
        {
            // Convert to UTF8 and return
-            std::string string = mEncoder ? mEncoder->getLegacyEnc(data) : data;
+            const std::string_view string = mEncoder != nullptr ? mEncoder->getLegacyEnc(data) : data;
-            write(string.c_str(), string.size());
+            write(string.data(), string.size());
        }
    }
--- a/components/fontloader/fontloader.cpp
+++ b/components/fontloader/fontloader.cpp
@ -1,6 +1,8 @@
 #include "fontloader.hpp"
 #include <stdexcept>
 #include <string_view>
 #include <array>
 #include <osg/Image>
@ -26,7 +28,7 @@
 namespace
 {
-    unsigned long utf8ToUnicode(const std::string& utf8)
+    unsigned long utf8ToUnicode(std::string_view utf8)
    {
        size_t i = 0;
        unsigned long unicode;
@ -116,16 +118,21 @@ namespace
        }
    }
-    // getUtf8, aka the worst function ever written.
+    // getUnicode includes various hacks for dealing with Morrowind's .fnt files that are *mostly*
    // This includes various hacks for dealing with Morrowind's .fnt files that are *mostly*
    // in the expected win12XX encoding, but also have randomly swapped characters sometimes.
    // Looks like the Morrowind developers found standard encodings too boring and threw in some twists for fun.
-    std::string getUtf8 (unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding)
+    unsigned long getUnicode(unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding)
    {
        if (encoding == ToUTF8::WINDOWS_1250) // Hack for polish font
-            return encoder.getUtf8(std::string(1, mapUtf8Char(c)));
+        {
            const std::array<char, 2> str {static_cast<char>(mapUtf8Char(c)), '\0'};
            return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1)));
        }
        else
-            return encoder.getUtf8(std::string(1, c));
+        {
            const std::array<char, 2> str {static_cast<char>(c), '\0'};
            return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1)));
        }
    }
    [[noreturn]] void fail (Files::IStreamPtr file, const std::string& fileName, const std::string& message)
@ -355,7 +362,7 @@ namespace Gui
            float h  = data[i].bottom_left.y*height - y1;
            ToUTF8::Utf8Encoder encoder(mEncoding);
-            unsigned long unicodeVal = utf8ToUnicode(getUtf8(i, encoder, mEncoding));
+            unsigned long unicodeVal = getUnicode(i, encoder, mEncoding);
            MyGUI::xml::ElementPtr code = codes->createChild("Code");
            code->addAttribute("index", unicodeVal);
--- a/components/to_utf8/to_utf8.cpp
+++ b/components/to_utf8/to_utf8.cpp
@ -77,12 +77,15 @@ Utf8Encoder::Utf8Encoder(const FromType sourceEncoding):
    }
 }
-std::string Utf8Encoder::getUtf8(const char* input, size_t size)
+std::string_view Utf8Encoder::getUtf8(std::string_view input)
 {
    if (input.empty())
        return input;
    // Double check that the input string stops at some point (it might
    // contain zero terminators before this, inside its own data, which
    // is also ok.)
-    assert(input[size] == 0);
+    assert(input[input.size()] == 0);
    // Note: The rest of this function is designed for single-character
    // input encodings only. It also assumes that the input encoding
@ -93,19 +96,19 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
    // Compute output length, and check for pure ascii input at the same
    // time.
    bool ascii;
-    size_t outlen = getLength(input, ascii);
+    size_t outlen = getLength(input.data(), ascii);
    // If we're pure ascii, then don't bother converting anything.
    if(ascii)
-        return std::string(input, outlen);
+        return std::string_view(input.data(), outlen);
    // Make sure the output is large enough
    resize(outlen);
    char *out = &mOutput[0];
    // Translate
-    while (*input)
+    for (const char* ptr = input.data(); *ptr;)
-        copyFromArray(*(input++), out);
+        copyFromArray(*(ptr++), out);
    // Make sure that we wrote the correct number of bytes
    assert((out-&mOutput[0]) == (int)outlen);
@ -114,16 +117,18 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
    assert(mOutput.size() > outlen);
    assert(mOutput[outlen] == 0);
-    // Return a string
+    return std::string_view(mOutput.data(), outlen);
    return std::string(&mOutput[0], outlen);
 }
-std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
+std::string_view Utf8Encoder::getLegacyEnc(std::string_view input)
 {
    if (input.empty())
        return input;
    // Double check that the input string stops at some point (it might
    // contain zero terminators before this, inside its own data, which
    // is also ok.)
-    assert(input[size] == 0);
+    assert(input[input.size()] == 0);
    // TODO: The rest of this function is designed for single-character
    // input encodings only. It also assumes that the input the input
@ -134,19 +139,19 @@ std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
    // Compute output length, and check for pure ascii input at the same
    // time.
    bool ascii;
-    size_t outlen = getLength2(input, ascii);
+    size_t outlen = getLength2(input.data(), ascii);
    // If we're pure ascii, then don't bother converting anything.
    if(ascii)
-        return std::string(input, outlen);
+        return std::string_view(input.data(), outlen);
    // Make sure the output is large enough
    resize(outlen);
    char *out = &mOutput[0];
    // Translate
-    while(*input)
+    for (const char* ptr = input.data(); *ptr;)
-        copyFromArray2(input, out);
+        copyFromArray2(ptr, out);
    // Make sure that we wrote the correct number of bytes
    assert((out-&mOutput[0]) == (int)outlen);
@ -155,8 +160,7 @@ std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
    assert(mOutput.size() > outlen);
    assert(mOutput[outlen] == 0);
-    // Return a string
+    return std::string_view(mOutput.data(), outlen);
    return std::string(&mOutput[0], outlen);
 }
 // Make sure the output vector is large enough for 'size' bytes,
--- a/components/to_utf8/to_utf8.hpp
+++ b/components/to_utf8/to_utf8.hpp
@ -4,6 +4,7 @@
 #include <string>
 #include <cstring>
 #include <vector>
 #include <string_view>
 namespace ToUTF8
 {
@ -27,18 +28,14 @@ namespace ToUTF8
        public:
            Utf8Encoder(FromType sourceEncoding);
-            // Convert to UTF8 from the previously given code page.
+            /// Convert to UTF8 from the previously given code page.
-            std::string getUtf8(const char *input, size_t size);
+            /// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
-            inline std::string getUtf8(const std::string &str)
+            /// ASCII-only string. Otherwise returns a view to the input.
-            {
+            std::string_view getUtf8(std::string_view input);
                return getUtf8(str.c_str(), str.size());
            }
-            std::string getLegacyEnc(const char *input, size_t size);
+            /// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
-            inline std::string getLegacyEnc(const std::string &str)
+            /// ASCII-only string. Otherwise returns a view to the input.
-            {
+            std::string_view getLegacyEnc(std::string_view input);
                return getLegacyEnc(str.c_str(), str.size());
            }
        private:
            void resize(size_t size);