mirror of
https://github.com/OpenMW/openmw.git
synced 2025-02-06 06:45:35 +00:00
Merge branch 'utfconversionfixsize' into 'master'
Use fixed-size type to hold a whole code point See merge request OpenMW/openmw!4163
This commit is contained in:
commit
31102a2076
3 changed files with 44 additions and 7 deletions
|
@ -247,6 +247,7 @@
|
|||
Task #5896: Do not use deprecated MyGUI properties
|
||||
Task #6085: Replace boost::filesystem with std::filesystem
|
||||
Task #6149: Dehardcode Lua API_REVISION
|
||||
Task #6505: UTF-8 support in Lua scripts
|
||||
Task #6624: Drop support for saves made prior to 0.45
|
||||
Task #7048: Get rid of std::bind
|
||||
Task #7113: Move from std::atoi to std::from_char
|
||||
|
|
|
@ -6,8 +6,8 @@
|
|||
namespace
|
||||
{
|
||||
constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2
|
||||
constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
|
||||
// constexpr uint32_t MAXUNICODE = 0x10FFFFu;
|
||||
// constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
|
||||
constexpr uint32_t MAXUNICODE = 0x10FFFFu;
|
||||
|
||||
inline bool isNilOrNone(const sol::stack_proxy arg)
|
||||
{
|
||||
|
@ -96,16 +96,15 @@ namespace LuaUtf8
|
|||
|
||||
utf8["char"] = [](const sol::variadic_args args) -> std::string {
|
||||
std::string result{};
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||
for (size_t i = 0; i < args.size(); ++i)
|
||||
{
|
||||
int64_t codepoint = getInteger(args[i], (i + 1), "char");
|
||||
if (codepoint < 0 || codepoint > MAXUTF)
|
||||
if (codepoint < 0 || codepoint > MAXUNICODE)
|
||||
throw std::runtime_error(
|
||||
"bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)");
|
||||
|
||||
// this feels dodgy if wchar_t is 16-bit as MAXUTF won't fit in sixteen bits
|
||||
result += converter.to_bytes(static_cast<wchar_t>(codepoint));
|
||||
result += converter.to_bytes(static_cast<char32_t>(codepoint));
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
local testing = require('testing_util')
|
||||
local testing = require('testing_util')
|
||||
local core = require('openmw.core')
|
||||
local async = require('openmw.async')
|
||||
local util = require('openmw.util')
|
||||
|
@ -150,6 +150,42 @@ local function testRecordCreation()
|
|||
testing.expectEqual(record[key],value)
|
||||
end
|
||||
end
|
||||
local function testUTF8()
|
||||
local utf8char = "😀"
|
||||
local utf8str = "Hello, 你好, 🌎!"
|
||||
local chars = {}
|
||||
|
||||
for codepoint = 0, 0x10FFFF do
|
||||
local char = utf8.char(codepoint)
|
||||
local charSize = string.len(char)
|
||||
|
||||
testing.expect(not chars[char], nil, "Duplicate UTF-8 character: " .. char)
|
||||
chars[char] = true
|
||||
|
||||
if codepoint <= 0x7F then
|
||||
testing.expectEqual(charSize, 1)
|
||||
elseif codepoint <= 0x7FF then
|
||||
testing.expectEqual(charSize, 2)
|
||||
elseif codepoint <= 0xFFFF then
|
||||
testing.expectEqual(charSize, 3)
|
||||
elseif codepoint <= 0x10FFFF then
|
||||
testing.expectEqual(charSize, 4)
|
||||
end
|
||||
|
||||
testing.expectEqual(utf8.codepoint(char), codepoint)
|
||||
testing.expectEqual(utf8.len(char), 1)
|
||||
end
|
||||
|
||||
local str = ""
|
||||
for utf_char in utf8str:gmatch(utf8.charpattern) do
|
||||
str = str .. utf_char
|
||||
end
|
||||
testing.expectEqual(str, utf8str)
|
||||
|
||||
testing.expectEqual(utf8.codepoint(utf8char), 128512)
|
||||
testing.expectEqual(utf8.len(utf8str), 13)
|
||||
testing.expectEqual(utf8.offset(utf8str, 9), 11)
|
||||
end
|
||||
local function initPlayer()
|
||||
player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity)
|
||||
coroutine.yield()
|
||||
|
@ -189,6 +225,7 @@ tests = {
|
|||
{'getGMST', testGetGMST},
|
||||
{'recordStores', testRecordStores},
|
||||
{'recordCreation', testRecordCreation},
|
||||
{'utf8', testUTF8},
|
||||
{'mwscript', testMWScript},
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue