mirror of https://github.com/OpenMW/openmw.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
167 lines
5.9 KiB
C++
167 lines
5.9 KiB
C++
2 years ago
|
#include <components/misc/strings/conversion.hpp>
|
||
2 years ago
|
#include <components/to_utf8/to_utf8.hpp>
|
||
3 years ago
|
|
||
|
#include <gtest/gtest.h>
|
||
|
|
||
2 years ago
|
#include <filesystem>
|
||
2 years ago
|
#include <fstream>
|
||
3 years ago
|
|
||
2 years ago
|
#ifndef OPENMW_PROJECT_SOURCE_DIR
|
||
|
#define OPENMW_PROJECT_SOURCE_DIR "."
|
||
3 years ago
|
#endif
|
||
|
|
||
|
namespace
|
||
|
{
|
||
|
using namespace testing;
|
||
|
using namespace ToUTF8;
|
||
|
|
||
|
struct Params
|
||
|
{
|
||
|
FromType mLegacyEncoding;
|
||
|
std::string mLegacyEncodingFileName;
|
||
|
std::string mUtf8FileName;
|
||
|
};
|
||
|
|
||
|
std::string readContent(const std::string& fileName)
|
||
|
{
|
||
|
std::ifstream file;
|
||
|
file.exceptions(std::ios::failbit | std::ios::badbit);
|
||
7 months ago
|
file.open(std::filesystem::path{ OPENMW_PROJECT_SOURCE_DIR } / "apps" / "components_tests" / "toutf8" / "data"
|
||
2 years ago
|
/ Misc::StringUtils::stringToU8String(fileName));
|
||
3 years ago
|
std::stringstream buffer;
|
||
|
buffer << file.rdbuf();
|
||
|
return buffer.str();
|
||
|
}
|
||
|
|
||
2 years ago
|
struct Utf8EncoderTest : TestWithParam<Params>
|
||
|
{
|
||
|
};
|
||
3 years ago
|
|
||
|
TEST(Utf8EncoderTest, getUtf8ShouldReturnEmptyAsIs)
|
||
|
{
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
EXPECT_EQ(encoder.getUtf8(std::string_view()), std::string_view());
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getUtf8ShouldReturnAsciiOnlyAsIs)
|
||
|
{
|
||
|
std::string input;
|
||
|
for (int c = 1; c <= std::numeric_limits<char>::max(); ++c)
|
||
10 months ago
|
input.push_back(static_cast<char>(c));
|
||
3 years ago
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getUtf8(input);
|
||
|
EXPECT_EQ(result.data(), input.data());
|
||
|
EXPECT_EQ(result.size(), input.size());
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilZero)
|
||
|
{
|
||
|
const std::string input("a\0b");
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getUtf8(input);
|
||
|
EXPECT_EQ(result, "a");
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilEndOfInputForAscii)
|
||
|
{
|
||
|
const std::string input("abc");
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getUtf8(std::string_view(input.data(), 2));
|
||
|
EXPECT_EQ(result, "ab");
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilEndOfInputForNonAscii)
|
||
|
{
|
||
2 years ago
|
const std::string input(
|
||
|
"a\x92"
|
||
|
"b");
|
||
3 years ago
|
Utf8Encoder encoder(FromType::WINDOWS_1252);
|
||
|
const std::string_view result = encoder.getUtf8(std::string_view(input.data(), 2));
|
||
|
EXPECT_EQ(result, "a\xE2\x80\x99");
|
||
|
}
|
||
|
|
||
|
TEST_P(Utf8EncoderTest, getUtf8ShouldConvertFromLegacyEncodingToUtf8)
|
||
|
{
|
||
|
const std::string input(readContent(GetParam().mLegacyEncodingFileName));
|
||
|
const std::string expected(readContent(GetParam().mUtf8FileName));
|
||
|
Utf8Encoder encoder(GetParam().mLegacyEncoding);
|
||
|
const std::string_view result = encoder.getUtf8(input);
|
||
|
EXPECT_EQ(result, expected);
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getLegacyEncShouldReturnEmptyAsIs)
|
||
|
{
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
EXPECT_EQ(encoder.getLegacyEnc(std::string_view()), std::string_view());
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getLegacyEncShouldReturnAsciiOnlyAsIs)
|
||
|
{
|
||
|
std::string input;
|
||
|
for (int c = 1; c <= std::numeric_limits<char>::max(); ++c)
|
||
10 months ago
|
input.push_back(static_cast<char>(c));
|
||
3 years ago
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||
|
EXPECT_EQ(result.data(), input.data());
|
||
|
EXPECT_EQ(result.size(), input.size());
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getLegacyEncShouldLookUpUntilZero)
|
||
|
{
|
||
|
const std::string input("a\0b");
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||
|
EXPECT_EQ(result, "a");
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getLegacyEncShouldLookUpUntilEndOfInputForAscii)
|
||
|
{
|
||
|
const std::string input("abc");
|
||
|
Utf8Encoder encoder(FromType::CP437);
|
||
|
const std::string_view result = encoder.getLegacyEnc(std::string_view(input.data(), 2));
|
||
|
EXPECT_EQ(result, "ab");
|
||
|
}
|
||
|
|
||
|
TEST(Utf8EncoderTest, getLegacyEncShouldStripIncompleteCharacters)
|
||
|
{
|
||
|
const std::string input("a\xc3\xa2\xe2\x80\x99");
|
||
|
Utf8Encoder encoder(FromType::WINDOWS_1252);
|
||
|
const std::string_view result = encoder.getLegacyEnc(std::string_view(input.data(), 5));
|
||
|
EXPECT_EQ(result, "a\xe2");
|
||
|
}
|
||
|
|
||
|
TEST_P(Utf8EncoderTest, getLegacyEncShouldConvertFromUtf8ToLegacyEncoding)
|
||
|
{
|
||
|
const std::string input(readContent(GetParam().mUtf8FileName));
|
||
|
const std::string expected(readContent(GetParam().mLegacyEncodingFileName));
|
||
|
Utf8Encoder encoder(GetParam().mLegacyEncoding);
|
||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||
|
EXPECT_EQ(result, expected);
|
||
|
}
|
||
|
|
||
2 years ago
|
INSTANTIATE_TEST_SUITE_P(Files, Utf8EncoderTest,
|
||
|
Values(Params{ ToUTF8::WINDOWS_1251, "russian-win1251.txt", "russian-utf8.txt" },
|
||
|
Params{ ToUTF8::WINDOWS_1252, "french-win1252.txt", "french-utf8.txt" }));
|
||
3 years ago
|
|
||
|
TEST(StatelessUtf8EncoderTest, shouldCleanupBuffer)
|
||
|
{
|
||
|
std::string buffer;
|
||
|
StatelessUtf8Encoder encoder(FromType::WINDOWS_1252);
|
||
|
encoder.getUtf8(std::string_view("long string\x92"), BufferAllocationPolicy::UseGrowFactor, buffer);
|
||
|
const std::string shortString("short\x92");
|
||
|
ASSERT_GT(buffer.size(), shortString.size());
|
||
|
const std::string_view shortUtf8 = encoder.getUtf8(shortString, BufferAllocationPolicy::UseGrowFactor, buffer);
|
||
|
ASSERT_GE(buffer.size(), shortUtf8.size());
|
||
|
EXPECT_EQ(buffer[shortUtf8.size()], '\0') << buffer;
|
||
|
}
|
||
|
|
||
|
TEST(StatelessUtf8EncoderTest, withFitToRequiredSizeShouldResizeBuffer)
|
||
|
{
|
||
|
std::string buffer;
|
||
|
StatelessUtf8Encoder encoder(FromType::WINDOWS_1252);
|
||
2 years ago
|
const std::string_view utf8
|
||
|
= encoder.getUtf8(std::string_view("long string\x92"), BufferAllocationPolicy::FitToRequiredSize, buffer);
|
||
3 years ago
|
EXPECT_EQ(buffer.size(), utf8.size());
|
||
|
}
|
||
3 years ago
|
}
|