Merge branch 'utfconversionfixsize' into 'master'

Use fixed-size type to hold a whole code point

See merge request OpenMW/openmw!4163
pull/3236/head
psi29a 7 months ago
commit 31102a2076

@ -247,6 +247,7 @@
Task #5896: Do not use deprecated MyGUI properties
Task #6085: Replace boost::filesystem with std::filesystem
Task #6149: Dehardcode Lua API_REVISION
Task #6505: UTF-8 support in Lua scripts
Task #6624: Drop support for saves made prior to 0.45
Task #7048: Get rid of std::bind
Task #7113: Move from std::atoi to std::from_char

@ -6,8 +6,8 @@
namespace
{
constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2
constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
// constexpr uint32_t MAXUNICODE = 0x10FFFFu;
// constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
constexpr uint32_t MAXUNICODE = 0x10FFFFu;
inline bool isNilOrNone(const sol::stack_proxy arg)
{
@ -96,16 +96,15 @@ namespace LuaUtf8
utf8["char"] = [](const sol::variadic_args args) -> std::string {
std::string result{};
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
for (size_t i = 0; i < args.size(); ++i)
{
int64_t codepoint = getInteger(args[i], (i + 1), "char");
if (codepoint < 0 || codepoint > MAXUTF)
if (codepoint < 0 || codepoint > MAXUNICODE)
throw std::runtime_error(
"bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)");
// this feels dodgy if wchar_t is 16-bit as MAXUTF won't fit in sixteen bits
result += converter.to_bytes(static_cast<wchar_t>(codepoint));
result += converter.to_bytes(static_cast<char32_t>(codepoint));
}
return result;
};

@ -1,4 +1,4 @@
local testing = require('testing_util')
local testing = require('testing_util')
local core = require('openmw.core')
local async = require('openmw.async')
local util = require('openmw.util')
@ -150,6 +150,42 @@ local function testRecordCreation()
testing.expectEqual(record[key],value)
end
end
local function testUTF8()
local utf8char = "😀"
local utf8str = "Hello, 你好, 🌎!"
local chars = {}
for codepoint = 0, 0x10FFFF do
local char = utf8.char(codepoint)
local charSize = string.len(char)
testing.expect(not chars[char], nil, "Duplicate UTF-8 character: " .. char)
chars[char] = true
if codepoint <= 0x7F then
testing.expectEqual(charSize, 1)
elseif codepoint <= 0x7FF then
testing.expectEqual(charSize, 2)
elseif codepoint <= 0xFFFF then
testing.expectEqual(charSize, 3)
elseif codepoint <= 0x10FFFF then
testing.expectEqual(charSize, 4)
end
testing.expectEqual(utf8.codepoint(char), codepoint)
testing.expectEqual(utf8.len(char), 1)
end
local str = ""
for utf_char in utf8str:gmatch(utf8.charpattern) do
str = str .. utf_char
end
testing.expectEqual(str, utf8str)
testing.expectEqual(utf8.codepoint(utf8char), 128512)
testing.expectEqual(utf8.len(utf8str), 13)
testing.expectEqual(utf8.offset(utf8str, 9), 11)
end
local function initPlayer()
player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity)
coroutine.yield()
@ -189,6 +225,7 @@ tests = {
{'getGMST', testGetGMST},
{'recordStores', testRecordStores},
{'recordCreation', testRecordCreation},
{'utf8', testUTF8},
{'mwscript', testMWScript},
}

Loading…
Cancel
Save