|
|
@ -77,12 +77,15 @@ Utf8Encoder::Utf8Encoder(const FromType sourceEncoding):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
|
|
|
std::string Utf8Encoder::getUtf8(std::string_view input)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
|
|
|
|
if (input.empty())
|
|
|
|
|
|
|
|
return input;
|
|
|
|
|
|
|
|
|
|
|
|
// Double check that the input string stops at some point (it might
|
|
|
|
// Double check that the input string stops at some point (it might
|
|
|
|
// contain zero terminators before this, inside its own data, which
|
|
|
|
// contain zero terminators before this, inside its own data, which
|
|
|
|
// is also ok.)
|
|
|
|
// is also ok.)
|
|
|
|
assert(input[size] == 0);
|
|
|
|
assert(input[input.size()] == 0);
|
|
|
|
|
|
|
|
|
|
|
|
// Note: The rest of this function is designed for single-character
|
|
|
|
// Note: The rest of this function is designed for single-character
|
|
|
|
// input encodings only. It also assumes that the input encoding
|
|
|
|
// input encodings only. It also assumes that the input encoding
|
|
|
@ -93,19 +96,19 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
|
|
|
// Compute output length, and check for pure ascii input at the same
|
|
|
|
// Compute output length, and check for pure ascii input at the same
|
|
|
|
// time.
|
|
|
|
// time.
|
|
|
|
bool ascii;
|
|
|
|
bool ascii;
|
|
|
|
size_t outlen = getLength(input, ascii);
|
|
|
|
size_t outlen = getLength(input.data(), ascii);
|
|
|
|
|
|
|
|
|
|
|
|
// If we're pure ascii, then don't bother converting anything.
|
|
|
|
// If we're pure ascii, then don't bother converting anything.
|
|
|
|
if(ascii)
|
|
|
|
if(ascii)
|
|
|
|
return std::string(input, outlen);
|
|
|
|
return std::string(input.data(), outlen);
|
|
|
|
|
|
|
|
|
|
|
|
// Make sure the output is large enough
|
|
|
|
// Make sure the output is large enough
|
|
|
|
resize(outlen);
|
|
|
|
resize(outlen);
|
|
|
|
char *out = &mOutput[0];
|
|
|
|
char *out = &mOutput[0];
|
|
|
|
|
|
|
|
|
|
|
|
// Translate
|
|
|
|
// Translate
|
|
|
|
while (*input)
|
|
|
|
for (const char* ptr = input.data(); *ptr;)
|
|
|
|
copyFromArray(*(input++), out);
|
|
|
|
copyFromArray(*(ptr++), out);
|
|
|
|
|
|
|
|
|
|
|
|
// Make sure that we wrote the correct number of bytes
|
|
|
|
// Make sure that we wrote the correct number of bytes
|
|
|
|
assert((out-&mOutput[0]) == (int)outlen);
|
|
|
|
assert((out-&mOutput[0]) == (int)outlen);
|
|
|
@ -118,12 +121,15 @@ std::string Utf8Encoder::getUtf8(const char* input, size_t size)
|
|
|
|
return std::string(&mOutput[0], outlen);
|
|
|
|
return std::string(&mOutput[0], outlen);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
|
|
|
std::string Utf8Encoder::getLegacyEnc(std::string_view input)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
|
|
|
|
if (input.empty())
|
|
|
|
|
|
|
|
return input;
|
|
|
|
|
|
|
|
|
|
|
|
// Double check that the input string stops at some point (it might
|
|
|
|
// Double check that the input string stops at some point (it might
|
|
|
|
// contain zero terminators before this, inside its own data, which
|
|
|
|
// contain zero terminators before this, inside its own data, which
|
|
|
|
// is also ok.)
|
|
|
|
// is also ok.)
|
|
|
|
assert(input[size] == 0);
|
|
|
|
assert(input[input.size()] == 0);
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: The rest of this function is designed for single-character
|
|
|
|
// TODO: The rest of this function is designed for single-character
|
|
|
|
// input encodings only. It also assumes that the input the input
|
|
|
|
// input encodings only. It also assumes that the input the input
|
|
|
@ -134,19 +140,19 @@ std::string Utf8Encoder::getLegacyEnc(const char *input, size_t size)
|
|
|
|
// Compute output length, and check for pure ascii input at the same
|
|
|
|
// Compute output length, and check for pure ascii input at the same
|
|
|
|
// time.
|
|
|
|
// time.
|
|
|
|
bool ascii;
|
|
|
|
bool ascii;
|
|
|
|
size_t outlen = getLength2(input, ascii);
|
|
|
|
size_t outlen = getLength2(input.data(), ascii);
|
|
|
|
|
|
|
|
|
|
|
|
// If we're pure ascii, then don't bother converting anything.
|
|
|
|
// If we're pure ascii, then don't bother converting anything.
|
|
|
|
if(ascii)
|
|
|
|
if(ascii)
|
|
|
|
return std::string(input, outlen);
|
|
|
|
return std::string(input.data(), outlen);
|
|
|
|
|
|
|
|
|
|
|
|
// Make sure the output is large enough
|
|
|
|
// Make sure the output is large enough
|
|
|
|
resize(outlen);
|
|
|
|
resize(outlen);
|
|
|
|
char *out = &mOutput[0];
|
|
|
|
char *out = &mOutput[0];
|
|
|
|
|
|
|
|
|
|
|
|
// Translate
|
|
|
|
// Translate
|
|
|
|
while(*input)
|
|
|
|
for (const char* ptr = input.data(); *ptr;)
|
|
|
|
copyFromArray2(input, out);
|
|
|
|
copyFromArray2(ptr, out);
|
|
|
|
|
|
|
|
|
|
|
|
// Make sure that we wrote the correct number of bytes
|
|
|
|
// Make sure that we wrote the correct number of bytes
|
|
|
|
assert((out-&mOutput[0]) == (int)outlen);
|
|
|
|
assert((out-&mOutput[0]) == (int)outlen);
|
|
|
|