From d21f3809bd7d61994a8a8bd06b64b3e44ddbbd9d Mon Sep 17 00:00:00 2001 From: Kindi Date: Sun, 9 Jun 2024 03:13:27 +0800 Subject: [PATCH] test utf8 --- components/lua/utf8.cpp | 6 +-- .../integration_tests/test_lua_api/test.lua | 39 ++++++++++++++++++- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/components/lua/utf8.cpp b/components/lua/utf8.cpp index 7bc8d345a7..37f3984b14 100644 --- a/components/lua/utf8.cpp +++ b/components/lua/utf8.cpp @@ -6,8 +6,8 @@ namespace { constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2 - constexpr uint32_t MAXUTF = 0x7FFFFFFFu; - // constexpr uint32_t MAXUNICODE = 0x10FFFFu; + // constexpr uint32_t MAXUTF = 0x7FFFFFFFu; + constexpr uint32_t MAXUNICODE = 0x10FFFFu; inline bool isNilOrNone(const sol::stack_proxy arg) { @@ -100,7 +100,7 @@ namespace LuaUtf8 for (size_t i = 0; i < args.size(); ++i) { int64_t codepoint = getInteger(args[i], (i + 1), "char"); - if (codepoint < 0 || codepoint > MAXUTF) + if (codepoint < 0 || codepoint > MAXUNICODE) throw std::runtime_error( "bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)"); diff --git a/scripts/data/integration_tests/test_lua_api/test.lua b/scripts/data/integration_tests/test_lua_api/test.lua index 22b8c4c0c1..24d19601f8 100644 --- a/scripts/data/integration_tests/test_lua_api/test.lua +++ b/scripts/data/integration_tests/test_lua_api/test.lua @@ -1,4 +1,4 @@ -local testing = require('testing_util') +local testing = require('testing_util') local core = require('openmw.core') local async = require('openmw.async') local util = require('openmw.util') @@ -150,6 +150,42 @@ local function testRecordCreation() testing.expectEqual(record[key],value) end end +local function testUTF8() + local utf8char = "😀" + local utf8str = "Hello, 你好, 🌎!" + local chars = {} + + for codepoint = 0, 0x10FFFF do + local char = utf8.char(codepoint) + local charSize = string.len(char) + + testing.expect(not chars[char], nil, "Duplicate UTF-8 character: " .. char) + chars[char] = true + + if codepoint <= 0x7F then + testing.expectEqual(charSize, 1) + elseif codepoint <= 0x7FF then + testing.expectEqual(charSize, 2) + elseif codepoint <= 0xFFFF then + testing.expectEqual(charSize, 3) + elseif codepoint <= 0x10FFFF then + testing.expectEqual(charSize, 4) + end + + testing.expectEqual(utf8.codepoint(char), codepoint) + testing.expectEqual(utf8.len(char), 1) + end + + local str = "" + for utf_char in utf8str:gmatch(utf8.charpattern) do + str = str .. utf_char + end + testing.expectEqual(str, utf8str) + + testing.expectEqual(utf8.codepoint(utf8char), 128512) + testing.expectEqual(utf8.len(utf8str), 13) + testing.expectEqual(utf8.offset(utf8str, 9), 11) +end local function initPlayer() player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity) coroutine.yield() @@ -189,6 +225,7 @@ tests = { {'getGMST', testGetGMST}, {'recordStores', testRecordStores}, {'recordCreation', testRecordCreation}, + {'utf8', testUTF8}, {'mwscript', testMWScript}, }