mirror of
https://github.com/OpenMW/openmw.git
synced 2025-01-21 07:53:53 +00:00
Merge branch 'utf8_encoder_string_view' into 'master'
Use std::string_view for argument and return type of Utf8Encoder functions See merge request OpenMW/openmw!1652
This commit is contained in:
commit
cd8967e265
5 changed files with 45 additions and 37 deletions
|
@ -320,7 +320,7 @@ std::string ESMReader::getString(int size)
|
||||||
|
|
||||||
// Convert to UTF8 and return
|
// Convert to UTF8 and return
|
||||||
if (mEncoder)
|
if (mEncoder)
|
||||||
return mEncoder->getUtf8(ptr, size);
|
return std::string(mEncoder->getUtf8(std::string_view(ptr, size)));
|
||||||
|
|
||||||
return std::string (ptr, size);
|
return std::string (ptr, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -193,9 +193,9 @@ namespace ESM
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Convert to UTF8 and return
|
// Convert to UTF8 and return
|
||||||
std::string string = mEncoder ? mEncoder->getLegacyEnc(data) : data;
|
const std::string_view string = mEncoder != nullptr ? mEncoder->getLegacyEnc(data) : data;
|
||||||
|
|
||||||
write(string.c_str(), string.size());
|
write(string.data(), string.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#include "fontloader.hpp"
|
#include "fontloader.hpp"
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <string_view>
|
||||||
|
#include <array>
|
||||||
|
|
||||||
#include <osg/Image>
|
#include <osg/Image>
|
||||||
|
|
||||||
|
@ -26,7 +28,7 @@
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
unsigned long utf8ToUnicode(const std::string& utf8)
|
unsigned long utf8ToUnicode(std::string_view utf8)
|
||||||
{
|
{
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
unsigned long unicode;
|
unsigned long unicode;
|
||||||
|
@ -116,16 +118,21 @@ namespace
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getUtf8, aka the worst function ever written.
|
// getUnicode includes various hacks for dealing with Morrowind's .fnt files that are *mostly*
|
||||||
// This includes various hacks for dealing with Morrowind's .fnt files that are *mostly*
|
|
||||||
// in the expected win12XX encoding, but also have randomly swapped characters sometimes.
|
// in the expected win12XX encoding, but also have randomly swapped characters sometimes.
|
||||||
// Looks like the Morrowind developers found standard encodings too boring and threw in some twists for fun.
|
// Looks like the Morrowind developers found standard encodings too boring and threw in some twists for fun.
|
||||||
std::string getUtf8 (unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding)
|
unsigned long getUnicode(unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding)
|
||||||
{
|
{
|
||||||
if (encoding == ToUTF8::WINDOWS_1250) // Hack for polish font
|
if (encoding == ToUTF8::WINDOWS_1250) // Hack for polish font
|
||||||
return encoder.getUtf8(std::string(1, mapUtf8Char(c)));
|
{
|
||||||
|
const std::array<char, 2> str {static_cast<char>(mapUtf8Char(c)), '\0'};
|
||||||
|
return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1)));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return encoder.getUtf8(std::string(1, c));
|
{
|
||||||
|
const std::array<char, 2> str {static_cast<char>(c), '\0'};
|
||||||
|
return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[[noreturn]] void fail (Files::IStreamPtr file, const std::string& fileName, const std::string& message)
|
[[noreturn]] void fail (Files::IStreamPtr file, const std::string& fileName, const std::string& message)
|
||||||
|
@ -355,7 +362,7 @@ namespace Gui
|
||||||
float h = data[i].bottom_left.y*height - y1;
|
float h = data[i].bottom_left.y*height - y1;
|
||||||
|
|
||||||
ToUTF8::Utf8Encoder encoder(mEncoding);
|
ToUTF8::Utf8Encoder encoder(mEncoding);
|
||||||
unsigned long unicodeVal = utf8ToUnicode(getUtf8(i, encoder, mEncoding));
|
unsigned long unicodeVal = getUnicode(i, encoder, mEncoding);
|
||||||
|
|
||||||
MyGUI::xml::ElementPtr code = codes->createChild("Code");
|
MyGUI::xml::ElementPtr code = codes->createChild("Code");
|
||||||
code->addAttribute("index", unicodeVal);
|
code->addAttribute("index", unicodeVal);
|
||||||
|
|
|
@ -77,12 +77,15 @@ Utf8Encoder::Utf8Encoder(const FromType sourceEncoding):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
std::string_view Utf8Encoder::getUtf8(std::string_view input)
|
||||||
{
|
{
|
||||||
|
if (input.empty())
|
||||||
|
return input;
|
||||||
|
|
||||||
// Double check that the input string stops at some point (it might
|
// Double check that the input string stops at some point (it might
|
||||||
// contain zero terminators before this, inside its own data, which
|
// contain zero terminators before this, inside its own data, which
|
||||||
// is also ok.)
|
// is also ok.)
|
||||||
assert(input[size] == 0);
|
assert(input[input.size()] == 0);
|
||||||
|
|
||||||
// Note: The rest of this function is designed for single-character
|
// Note: The rest of this function is designed for single-character
|
||||||
// input encodings only. It also assumes that the input encoding
|
// input encodings only. It also assumes that the input encoding
|
||||||
|
@ -93,19 +96,19 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
||||||
// Compute output length, and check for pure ascii input at the same
|
// Compute output length, and check for pure ascii input at the same
|
||||||
// time.
|
// time.
|
||||||
bool ascii;
|
bool ascii;
|
||||||
size_t outlen = getLength(input, ascii);
|
size_t outlen = getLength(input.data(), ascii);
|
||||||
|
|
||||||
// If we're pure ascii, then don't bother converting anything.
|
// If we're pure ascii, then don't bother converting anything.
|
||||||
if(ascii)
|
if(ascii)
|
||||||
return std::string(input, outlen);
|
return std::string_view(input.data(), outlen);
|
||||||
|
|
||||||
// Make sure the output is large enough
|
// Make sure the output is large enough
|
||||||
resize(outlen);
|
resize(outlen);
|
||||||
char *out = &mOutput[0];
|
char *out = &mOutput[0];
|
||||||
|
|
||||||
// Translate
|
// Translate
|
||||||
while (*input)
|
for (const char* ptr = input.data(); *ptr;)
|
||||||
copyFromArray(*(input++), out);
|
copyFromArray(*(ptr++), out);
|
||||||
|
|
||||||
// Make sure that we wrote the correct number of bytes
|
// Make sure that we wrote the correct number of bytes
|
||||||
assert((out-&mOutput[0]) == (int)outlen);
|
assert((out-&mOutput[0]) == (int)outlen);
|
||||||
|
@ -114,16 +117,18 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
||||||
assert(mOutput.size() > outlen);
|
assert(mOutput.size() > outlen);
|
||||||
assert(mOutput[outlen] == 0);
|
assert(mOutput[outlen] == 0);
|
||||||
|
|
||||||
// Return a string
|
return std::string_view(mOutput.data(), outlen);
|
||||||
return std::string(&mOutput[0], outlen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
std::string_view Utf8Encoder::getLegacyEnc(std::string_view input)
|
||||||
{
|
{
|
||||||
|
if (input.empty())
|
||||||
|
return input;
|
||||||
|
|
||||||
// Double check that the input string stops at some point (it might
|
// Double check that the input string stops at some point (it might
|
||||||
// contain zero terminators before this, inside its own data, which
|
// contain zero terminators before this, inside its own data, which
|
||||||
// is also ok.)
|
// is also ok.)
|
||||||
assert(input[size] == 0);
|
assert(input[input.size()] == 0);
|
||||||
|
|
||||||
// TODO: The rest of this function is designed for single-character
|
// TODO: The rest of this function is designed for single-character
|
||||||
// input encodings only. It also assumes that the input the input
|
// input encodings only. It also assumes that the input the input
|
||||||
|
@ -134,19 +139,19 @@ std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
||||||
// Compute output length, and check for pure ascii input at the same
|
// Compute output length, and check for pure ascii input at the same
|
||||||
// time.
|
// time.
|
||||||
bool ascii;
|
bool ascii;
|
||||||
size_t outlen = getLength2(input, ascii);
|
size_t outlen = getLength2(input.data(), ascii);
|
||||||
|
|
||||||
// If we're pure ascii, then don't bother converting anything.
|
// If we're pure ascii, then don't bother converting anything.
|
||||||
if(ascii)
|
if(ascii)
|
||||||
return std::string(input, outlen);
|
return std::string_view(input.data(), outlen);
|
||||||
|
|
||||||
// Make sure the output is large enough
|
// Make sure the output is large enough
|
||||||
resize(outlen);
|
resize(outlen);
|
||||||
char *out = &mOutput[0];
|
char *out = &mOutput[0];
|
||||||
|
|
||||||
// Translate
|
// Translate
|
||||||
while(*input)
|
for (const char* ptr = input.data(); *ptr;)
|
||||||
copyFromArray2(input, out);
|
copyFromArray2(ptr, out);
|
||||||
|
|
||||||
// Make sure that we wrote the correct number of bytes
|
// Make sure that we wrote the correct number of bytes
|
||||||
assert((out-&mOutput[0]) == (int)outlen);
|
assert((out-&mOutput[0]) == (int)outlen);
|
||||||
|
@ -155,8 +160,7 @@ std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
||||||
assert(mOutput.size() > outlen);
|
assert(mOutput.size() > outlen);
|
||||||
assert(mOutput[outlen] == 0);
|
assert(mOutput[outlen] == 0);
|
||||||
|
|
||||||
// Return a string
|
return std::string_view(mOutput.data(), outlen);
|
||||||
return std::string(&mOutput[0], outlen);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make sure the output vector is large enough for 'size' bytes,
|
// Make sure the output vector is large enough for 'size' bytes,
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
namespace ToUTF8
|
namespace ToUTF8
|
||||||
{
|
{
|
||||||
|
@ -27,18 +28,14 @@ namespace ToUTF8
|
||||||
public:
|
public:
|
||||||
Utf8Encoder(FromType sourceEncoding);
|
Utf8Encoder(FromType sourceEncoding);
|
||||||
|
|
||||||
// Convert to UTF8 from the previously given code page.
|
/// Convert to UTF8 from the previously given code page.
|
||||||
std::string getUtf8(const char *input, size_t size);
|
/// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
|
||||||
inline std::string getUtf8(const std::string &str)
|
/// ASCII-only string. Otherwise returns a view to the input.
|
||||||
{
|
std::string_view getUtf8(std::string_view input);
|
||||||
return getUtf8(str.c_str(), str.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string getLegacyEnc(const char *input, size_t size);
|
/// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
|
||||||
inline std::string getLegacyEnc(const std::string &str)
|
/// ASCII-only string. Otherwise returns a view to the input.
|
||||||
{
|
std::string_view getLegacyEnc(std::string_view input);
|
||||||
return getLegacyEnc(str.c_str(), str.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void resize(size_t size);
|
void resize(size_t size);
|
||||||
|
|
Loading…
Reference in a new issue