mirror of
https://github.com/OpenMW/openmw.git
synced 2025-02-06 12:45:36 +00:00
Support UTF-8 by StringRefId::toDebugString
This commit is contained in:
parent
8e3e351015
commit
78b3f7288a
3 changed files with 67 additions and 16 deletions
|
@ -287,7 +287,11 @@ namespace ESM
|
||||||
{ RefId(), "Empty{}" },
|
{ RefId(), "Empty{}" },
|
||||||
{ RefId::stringRefId("foo"), "\"foo\"" },
|
{ RefId::stringRefId("foo"), "\"foo\"" },
|
||||||
{ RefId::stringRefId("BAR"), "\"BAR\"" },
|
{ RefId::stringRefId("BAR"), "\"BAR\"" },
|
||||||
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xFF\\xA\\x9\"" },
|
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xff\\xa\\x9\"" },
|
||||||
|
{ RefId::stringRefId("Логово дракона"), "\"Логово дракона\"" },
|
||||||
|
{ RefId::stringRefId("\xd0\x9b"), "\"Л\"" },
|
||||||
|
{ RefId::stringRefId("\xff\x9b"), "\"\\xff\\x9b\"" },
|
||||||
|
{ RefId::stringRefId("\xd0\xd0"), "\"\\xd0\\xd0\"" },
|
||||||
{ RefId::formIdRefId({ .mIndex = 42, .mContentFile = 0 }), "FormId:0x2a" },
|
{ RefId::formIdRefId({ .mIndex = 42, .mContentFile = 0 }), "FormId:0x2a" },
|
||||||
{ RefId::formIdRefId({ .mIndex = 0xffffff, .mContentFile = std::numeric_limits<std::int32_t>::min() }),
|
{ RefId::formIdRefId({ .mIndex = 0xffffff, .mContentFile = std::numeric_limits<std::int32_t>::min() }),
|
||||||
"FormId:0xff80000000ffffff" },
|
"FormId:0xff80000000ffffff" },
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
#include "stringrefid.hpp"
|
#include "stringrefid.hpp"
|
||||||
|
#include "serializerefid.hpp"
|
||||||
|
|
||||||
|
#include <charconv>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <system_error>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "components/misc/guarded.hpp"
|
#include "components/misc/guarded.hpp"
|
||||||
#include "components/misc/strings/algorithm.hpp"
|
#include "components/misc/strings/algorithm.hpp"
|
||||||
|
#include "components/misc/utf8stream.hpp"
|
||||||
|
|
||||||
namespace ESM
|
namespace ESM
|
||||||
{
|
{
|
||||||
|
@ -26,6 +30,18 @@ namespace ESM
|
||||||
it = locked->emplace(id).first;
|
it = locked->emplace(id).first;
|
||||||
return &*it;
|
return &*it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void addHex(unsigned char value, std::string& result)
|
||||||
|
{
|
||||||
|
const std::size_t size = 2 + getHexIntegralSize(value);
|
||||||
|
const std::size_t shift = result.size();
|
||||||
|
result.resize(shift + size);
|
||||||
|
result[shift] = '\\';
|
||||||
|
result[shift + 1] = 'x';
|
||||||
|
const auto [end, ec] = std::to_chars(result.data() + shift + 2, result.data() + result.size(), value, 16);
|
||||||
|
if (ec != std::errc())
|
||||||
|
throw std::system_error(std::make_error_code(ec));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StringRefId::StringRefId()
|
StringRefId::StringRefId()
|
||||||
|
@ -60,20 +76,43 @@ namespace ESM
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& stream, StringRefId value)
|
std::ostream& operator<<(std::ostream& stream, StringRefId value)
|
||||||
{
|
{
|
||||||
stream << '"';
|
return stream << value.toDebugString();
|
||||||
for (char c : *value.mValue)
|
|
||||||
if (std::isprint(c) && c != '\t' && c != '\n' && c != '\r')
|
|
||||||
stream << c;
|
|
||||||
else
|
|
||||||
stream << "\\x" << std::hex << std::uppercase << static_cast<unsigned>(static_cast<unsigned char>(c));
|
|
||||||
return stream << '"';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string StringRefId::toDebugString() const
|
std::string StringRefId::toDebugString() const
|
||||||
{
|
{
|
||||||
std::ostringstream stream;
|
std::string result;
|
||||||
stream << *this;
|
result.reserve(2 + mValue->size());
|
||||||
return stream.str();
|
result.push_back('"');
|
||||||
|
const unsigned char* ptr = reinterpret_cast<const unsigned char*>(mValue->data());
|
||||||
|
const unsigned char* const end = reinterpret_cast<const unsigned char*>(mValue->data() + mValue->size());
|
||||||
|
while (ptr != end)
|
||||||
|
{
|
||||||
|
if (Utf8Stream::isAscii(*ptr))
|
||||||
|
{
|
||||||
|
if (std::isprint(*ptr) && *ptr != '\t' && *ptr != '\n' && *ptr != '\r')
|
||||||
|
result.push_back(*ptr);
|
||||||
|
else
|
||||||
|
addHex(*ptr, result);
|
||||||
|
++ptr;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto [octets, first] = Utf8Stream::getOctetCount(*ptr);
|
||||||
|
const auto [chr, next] = Utf8Stream::decode(ptr + 1, end, first, octets);
|
||||||
|
if (chr == Utf8Stream::sBadChar())
|
||||||
|
{
|
||||||
|
while (ptr != std::min(end, ptr + octets))
|
||||||
|
{
|
||||||
|
addHex(*ptr, result);
|
||||||
|
++ptr;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
result.append(ptr, next);
|
||||||
|
ptr = next;
|
||||||
|
}
|
||||||
|
result.push_back('"');
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StringRefId::startsWith(std::string_view prefix) const
|
bool StringRefId::startsWith(std::string_view prefix) const
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#ifndef MISC_UTF8ITER_HPP
|
#ifndef MISC_UTF8ITER_HPP
|
||||||
#define MISC_UTF8ITER_HPP
|
#define MISC_UTF8ITER_HPP
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
@ -63,9 +64,11 @@ public:
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool isAscii(unsigned char value) { return (value & 0x80) == 0; }
|
||||||
|
|
||||||
static std::pair<UnicodeChar, Point> decode(Point cur, Point end)
|
static std::pair<UnicodeChar, Point> decode(Point cur, Point end)
|
||||||
{
|
{
|
||||||
if ((*cur & 0x80) == 0)
|
if (isAscii(*cur))
|
||||||
{
|
{
|
||||||
UnicodeChar chr = *cur++;
|
UnicodeChar chr = *cur++;
|
||||||
|
|
||||||
|
@ -75,8 +78,13 @@ public:
|
||||||
int octets;
|
int octets;
|
||||||
UnicodeChar chr;
|
UnicodeChar chr;
|
||||||
|
|
||||||
std::tie(octets, chr) = octet_count(*cur++);
|
std::tie(octets, chr) = getOctetCount(*cur++);
|
||||||
|
|
||||||
|
return decode(cur, end, chr, octets);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::pair<UnicodeChar, Point> decode(Point cur, Point end, UnicodeChar chr, std::size_t octets)
|
||||||
|
{
|
||||||
if (octets > 5)
|
if (octets > 5)
|
||||||
return std::make_pair(sBadChar(), cur);
|
return std::make_pair(sBadChar(), cur);
|
||||||
|
|
||||||
|
@ -161,10 +169,9 @@ public:
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
static std::pair<std::size_t, UnicodeChar> getOctetCount(unsigned char octet)
|
||||||
static std::pair<int, UnicodeChar> octet_count(unsigned char octet)
|
|
||||||
{
|
{
|
||||||
int octets;
|
std::size_t octets;
|
||||||
|
|
||||||
unsigned char mark = 0xC0;
|
unsigned char mark = 0xC0;
|
||||||
unsigned char mask = 0xE0;
|
unsigned char mask = 0xE0;
|
||||||
|
@ -181,6 +188,7 @@ private:
|
||||||
return std::make_pair(octets, octet & ~mask);
|
return std::make_pair(octets, octet & ~mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
void next() { std::tie(val, nxt) = decode(nxt, end); }
|
void next() { std::tie(val, nxt) = decode(nxt, end); }
|
||||||
|
|
||||||
Point cur;
|
Point cur;
|
||||||
|
|
Loading…
Reference in a new issue