mirror of
https://github.com/OpenMW/openmw.git
synced 2025-03-27 12:40:25 +00:00
Merge branch 'utfconversionfixsize' into 'master'
Use fixed-size type to hold a whole code point See merge request OpenMW/openmw!4163
This commit is contained in:
commit
31102a2076
3 changed files with 44 additions and 7 deletions
|
@ -247,6 +247,7 @@
|
||||||
Task #5896: Do not use deprecated MyGUI properties
|
Task #5896: Do not use deprecated MyGUI properties
|
||||||
Task #6085: Replace boost::filesystem with std::filesystem
|
Task #6085: Replace boost::filesystem with std::filesystem
|
||||||
Task #6149: Dehardcode Lua API_REVISION
|
Task #6149: Dehardcode Lua API_REVISION
|
||||||
|
Task #6505: UTF-8 support in Lua scripts
|
||||||
Task #6624: Drop support for saves made prior to 0.45
|
Task #6624: Drop support for saves made prior to 0.45
|
||||||
Task #7048: Get rid of std::bind
|
Task #7048: Get rid of std::bind
|
||||||
Task #7113: Move from std::atoi to std::from_char
|
Task #7113: Move from std::atoi to std::from_char
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2
|
constexpr std::string_view UTF8PATT = "[%z\x01-\x7F\xC2-\xF4][\x80-\xBF]*"; // %z is deprecated in Lua5.2
|
||||||
constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
|
// constexpr uint32_t MAXUTF = 0x7FFFFFFFu;
|
||||||
// constexpr uint32_t MAXUNICODE = 0x10FFFFu;
|
constexpr uint32_t MAXUNICODE = 0x10FFFFu;
|
||||||
|
|
||||||
inline bool isNilOrNone(const sol::stack_proxy arg)
|
inline bool isNilOrNone(const sol::stack_proxy arg)
|
||||||
{
|
{
|
||||||
|
@ -96,16 +96,15 @@ namespace LuaUtf8
|
||||||
|
|
||||||
utf8["char"] = [](const sol::variadic_args args) -> std::string {
|
utf8["char"] = [](const sol::variadic_args args) -> std::string {
|
||||||
std::string result{};
|
std::string result{};
|
||||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||||
for (size_t i = 0; i < args.size(); ++i)
|
for (size_t i = 0; i < args.size(); ++i)
|
||||||
{
|
{
|
||||||
int64_t codepoint = getInteger(args[i], (i + 1), "char");
|
int64_t codepoint = getInteger(args[i], (i + 1), "char");
|
||||||
if (codepoint < 0 || codepoint > MAXUTF)
|
if (codepoint < 0 || codepoint > MAXUNICODE)
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)");
|
"bad argument #" + std::to_string(i + 1) + " to 'char' (value out of range)");
|
||||||
|
|
||||||
// this feels dodgy if wchar_t is 16-bit as MAXUTF won't fit in sixteen bits
|
result += converter.to_bytes(static_cast<char32_t>(codepoint));
|
||||||
result += converter.to_bytes(static_cast<wchar_t>(codepoint));
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
local testing = require('testing_util')
|
local testing = require('testing_util')
|
||||||
local core = require('openmw.core')
|
local core = require('openmw.core')
|
||||||
local async = require('openmw.async')
|
local async = require('openmw.async')
|
||||||
local util = require('openmw.util')
|
local util = require('openmw.util')
|
||||||
|
@ -150,6 +150,42 @@ local function testRecordCreation()
|
||||||
testing.expectEqual(record[key],value)
|
testing.expectEqual(record[key],value)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
local function testUTF8()
|
||||||
|
local utf8char = "😀"
|
||||||
|
local utf8str = "Hello, 你好, 🌎!"
|
||||||
|
local chars = {}
|
||||||
|
|
||||||
|
for codepoint = 0, 0x10FFFF do
|
||||||
|
local char = utf8.char(codepoint)
|
||||||
|
local charSize = string.len(char)
|
||||||
|
|
||||||
|
testing.expect(not chars[char], nil, "Duplicate UTF-8 character: " .. char)
|
||||||
|
chars[char] = true
|
||||||
|
|
||||||
|
if codepoint <= 0x7F then
|
||||||
|
testing.expectEqual(charSize, 1)
|
||||||
|
elseif codepoint <= 0x7FF then
|
||||||
|
testing.expectEqual(charSize, 2)
|
||||||
|
elseif codepoint <= 0xFFFF then
|
||||||
|
testing.expectEqual(charSize, 3)
|
||||||
|
elseif codepoint <= 0x10FFFF then
|
||||||
|
testing.expectEqual(charSize, 4)
|
||||||
|
end
|
||||||
|
|
||||||
|
testing.expectEqual(utf8.codepoint(char), codepoint)
|
||||||
|
testing.expectEqual(utf8.len(char), 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
local str = ""
|
||||||
|
for utf_char in utf8str:gmatch(utf8.charpattern) do
|
||||||
|
str = str .. utf_char
|
||||||
|
end
|
||||||
|
testing.expectEqual(str, utf8str)
|
||||||
|
|
||||||
|
testing.expectEqual(utf8.codepoint(utf8char), 128512)
|
||||||
|
testing.expectEqual(utf8.len(utf8str), 13)
|
||||||
|
testing.expectEqual(utf8.offset(utf8str, 9), 11)
|
||||||
|
end
|
||||||
local function initPlayer()
|
local function initPlayer()
|
||||||
player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity)
|
player:teleport('', util.vector3(4096, 4096, 867.237), util.transform.identity)
|
||||||
coroutine.yield()
|
coroutine.yield()
|
||||||
|
@ -189,6 +225,7 @@ tests = {
|
||||||
{'getGMST', testGetGMST},
|
{'getGMST', testGetGMST},
|
||||||
{'recordStores', testRecordStores},
|
{'recordStores', testRecordStores},
|
||||||
{'recordCreation', testRecordCreation},
|
{'recordCreation', testRecordCreation},
|
||||||
|
{'utf8', testUTF8},
|
||||||
{'mwscript', testMWScript},
|
{'mwscript', testMWScript},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue