From 22d685eca387e79e0b27ddd22e923df70e337052 Mon Sep 17 00:00:00 2001 From: Kindi Date: Sat, 4 May 2024 19:56:30 +0800 Subject: [PATCH 1/2] ensure fitness --- CHANGELOG.md | 1 + components/lua/utf8.cpp | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a0fcf8d0b..69fe34eaca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -246,6 +246,7 @@ Task #5896: Do not use deprecated MyGUI properties Task #6085: Replace boost::filesystem with std::filesystem Task #6149: Dehardcode Lua API_REVISION + Task #6505: UTF-8 support in Lua scripts Task #6624: Drop support for saves made prior to 0.45 Task #7048: Get rid of std::bind Task #7113: Move from std::atoi to std::from_char diff --git a/components/lua/utf8.cpp b/components/lua/utf8.cpp index 2a585dac2d..7bc8d345a7 100644 --- a/components/lua/utf8.cpp +++ b/components/lua/utf8.cpp @@ -96,7 +96,7 @@ namespace LuaUtf8 utf8["char"] = [](const sol::variadic_args args) -> std::string { std::string result{}; - std::wstring_convert> converter; + std::wstring_convert, char32_t> converter; for (size_t i = 0; i < args.size(); ++i) { int64_t codepoint = getInteger(args[i], (i + 1), "char"); @@ -104,8 +104,7 @@ namespace LuaUtf8 throw std::runtime_error( "bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)"); - // this feels dodgy if wchar_t is 16-bit as MAXUTF won't fit in sixteen bits - result += converter.to_bytes(static_cast(codepoint)); + result += converter.to_bytes(static_cast(codepoint)); } return result; }; From d21f3809bd7d61994a8a8bd06b64b3e44ddbbd9d Mon Sep 17 00:00:00 2001 From: Kindi Date: Sun, 9 Jun 2024 03:13:27 +0800 Subject: [PATCH 2/2] test utf8 --- components/lua/utf8.cpp | 6 +-- .../integration_tests/test_lua_api/test.lua | 39 ++++++++++++++++++- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/components/lua/utf8.cpp b/components/lua/utf8.cpp index 7bc8d345a7..37f3984b14 100644 --- a/components/lua/utf8.cpp +++ b/components/lua/utf8.cpp @@ -6,8 +6,8 @@ namespace { constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2 - constexpr uint32_t MAXUTF = 0x7FFFFFFFu; - // constexpr uint32_t MAXUNICODE = 0x10FFFFu; + // constexpr uint32_t MAXUTF = 0x7FFFFFFFu; + constexpr uint32_t MAXUNICODE = 0x10FFFFu; inline bool isNilOrNone(const sol::stack_proxy arg) { @@ -100,7 +100,7 @@ namespace LuaUtf8 for (size_t i = 0; i < args.size(); ++i) { int64_t codepoint = getInteger(args[i], (i + 1), "char"); - if (codepoint < 0 || codepoint > MAXUTF) + if (codepoint < 0 || codepoint > MAXUNICODE) throw std::runtime_error( "bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)"); diff --git a/scripts/data/integration_tests/test_lua_api/test.lua b/scripts/data/integration_tests/test_lua_api/test.lua index 22b8c4c0c1..24d19601f8 100644 --- a/scripts/data/integration_tests/test_lua_api/test.lua +++ b/scripts/data/integration_tests/test_lua_api/test.lua @@ -1,4 +1,4 @@ -local testing = require('testing_util') +local testing = require('testing_util') local core = require('openmw.core') local async = require('openmw.async') local util = require('openmw.util') @@ -150,6 +150,42 @@ local function testRecordCreation() testing.expectEqual(record[key],value) end end +local function testUTF8() + local utf8char = "😀" + local utf8str = "Hello, 你好, 🌎!" + local chars = {} + + for codepoint = 0, 0x10FFFF do + local char = utf8.char(codepoint) + local charSize = string.len(char) + + testing.expect(not chars[char], nil, "Duplicate UTF-8 character: " .. char) + chars[char] = true + + if codepoint <= 0x7F then + testing.expectEqual(charSize, 1) + elseif codepoint <= 0x7FF then + testing.expectEqual(charSize, 2) + elseif codepoint <= 0xFFFF then + testing.expectEqual(charSize, 3) + elseif codepoint <= 0x10FFFF then + testing.expectEqual(charSize, 4) + end + + testing.expectEqual(utf8.codepoint(char), codepoint) + testing.expectEqual(utf8.len(char), 1) + end + + local str = "" + for utf_char in utf8str:gmatch(utf8.charpattern) do + str = str .. utf_char + end + testing.expectEqual(str, utf8str) + + testing.expectEqual(utf8.codepoint(utf8char), 128512) + testing.expectEqual(utf8.len(utf8str), 13) + testing.expectEqual(utf8.offset(utf8str, 9), 11) +end local function initPlayer() player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity) coroutine.yield() @@ -189,6 +225,7 @@ tests = { {'getGMST', testGetGMST}, {'recordStores', testRecordStores}, {'recordCreation', testRecordCreation}, + {'utf8', testUTF8}, {'mwscript', testMWScript}, }