mirror of https://github.com/OpenMW/openmw.git
Add tests for Utf8Encoder
parent
d8127fdad2
commit
7884a01026
@ -0,0 +1,139 @@
|
|||||||
|
#include <components/to_utf8/to_utf8.hpp>
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#ifndef OPENMW_TEST_SUITE_SOURCE_DIR
|
||||||
|
#define OPENMW_TEST_SUITE_SOURCE_DIR ""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
using namespace testing;
|
||||||
|
using namespace ToUTF8;
|
||||||
|
|
||||||
|
struct Params
|
||||||
|
{
|
||||||
|
FromType mLegacyEncoding;
|
||||||
|
std::string mLegacyEncodingFileName;
|
||||||
|
std::string mUtf8FileName;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string readContent(const std::string& fileName)
|
||||||
|
{
|
||||||
|
std::ifstream file;
|
||||||
|
file.exceptions(std::ios::failbit | std::ios::badbit);
|
||||||
|
file.open(std::string(OPENMW_TEST_SUITE_SOURCE_DIR) + "/toutf8/data/" + fileName);
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << file.rdbuf();
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Utf8EncoderTest : TestWithParam<Params> {};
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getUtf8ShouldReturnEmptyAsIs)
|
||||||
|
{
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
EXPECT_EQ(encoder.getUtf8(std::string_view()), std::string_view());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getUtf8ShouldReturnAsciiOnlyAsIs)
|
||||||
|
{
|
||||||
|
std::string input;
|
||||||
|
for (int c = 1; c <= std::numeric_limits<char>::max(); ++c)
|
||||||
|
input.push_back(c);
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getUtf8(input);
|
||||||
|
EXPECT_EQ(result.data(), input.data());
|
||||||
|
EXPECT_EQ(result.size(), input.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilZero)
|
||||||
|
{
|
||||||
|
const std::string input("a\0b");
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getUtf8(input);
|
||||||
|
EXPECT_EQ(result, "a");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilEndOfInputForAscii)
|
||||||
|
{
|
||||||
|
const std::string input("abc");
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getUtf8(std::string_view(input.data(), 2));
|
||||||
|
EXPECT_EQ(result, "ab");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getUtf8ShouldLookUpUntilEndOfInputForNonAscii)
|
||||||
|
{
|
||||||
|
const std::string input("a\x92" "b");
|
||||||
|
Utf8Encoder encoder(FromType::WINDOWS_1252);
|
||||||
|
const std::string_view result = encoder.getUtf8(std::string_view(input.data(), 2));
|
||||||
|
EXPECT_EQ(result, "a\xE2\x80\x99");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(Utf8EncoderTest, getUtf8ShouldConvertFromLegacyEncodingToUtf8)
|
||||||
|
{
|
||||||
|
const std::string input(readContent(GetParam().mLegacyEncodingFileName));
|
||||||
|
const std::string expected(readContent(GetParam().mUtf8FileName));
|
||||||
|
Utf8Encoder encoder(GetParam().mLegacyEncoding);
|
||||||
|
const std::string_view result = encoder.getUtf8(input);
|
||||||
|
EXPECT_EQ(result, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getLegacyEncShouldReturnEmptyAsIs)
|
||||||
|
{
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
EXPECT_EQ(encoder.getLegacyEnc(std::string_view()), std::string_view());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getLegacyEncShouldReturnAsciiOnlyAsIs)
|
||||||
|
{
|
||||||
|
std::string input;
|
||||||
|
for (int c = 1; c <= std::numeric_limits<char>::max(); ++c)
|
||||||
|
input.push_back(c);
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||||||
|
EXPECT_EQ(result.data(), input.data());
|
||||||
|
EXPECT_EQ(result.size(), input.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getLegacyEncShouldLookUpUntilZero)
|
||||||
|
{
|
||||||
|
const std::string input("a\0b");
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||||||
|
EXPECT_EQ(result, "a");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getLegacyEncShouldLookUpUntilEndOfInputForAscii)
|
||||||
|
{
|
||||||
|
const std::string input("abc");
|
||||||
|
Utf8Encoder encoder(FromType::CP437);
|
||||||
|
const std::string_view result = encoder.getLegacyEnc(std::string_view(input.data(), 2));
|
||||||
|
EXPECT_EQ(result, "ab");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Utf8EncoderTest, getLegacyEncShouldStripIncompleteCharacters)
|
||||||
|
{
|
||||||
|
const std::string input("a\xc3\xa2\xe2\x80\x99");
|
||||||
|
Utf8Encoder encoder(FromType::WINDOWS_1252);
|
||||||
|
const std::string_view result = encoder.getLegacyEnc(std::string_view(input.data(), 5));
|
||||||
|
EXPECT_EQ(result, "a\xe2");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(Utf8EncoderTest, getLegacyEncShouldConvertFromUtf8ToLegacyEncoding)
|
||||||
|
{
|
||||||
|
const std::string input(readContent(GetParam().mUtf8FileName));
|
||||||
|
const std::string expected(readContent(GetParam().mLegacyEncodingFileName));
|
||||||
|
Utf8Encoder encoder(GetParam().mLegacyEncoding);
|
||||||
|
const std::string_view result = encoder.getLegacyEnc(input);
|
||||||
|
EXPECT_EQ(result, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(Files, Utf8EncoderTest, Values(
|
||||||
|
Params {ToUTF8::WINDOWS_1251, "russian-win1251.txt", "russian-utf8.txt"},
|
||||||
|
Params {ToUTF8::WINDOWS_1252, "french-win1252.txt", "french-utf8.txt"}
|
||||||
|
));
|
||||||
|
}
|
@ -1 +0,0 @@
|
|||||||
*_test
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
make || exit
|
|
||||||
|
|
||||||
mkdir -p output
|
|
||||||
|
|
||||||
PROGS=*_test
|
|
||||||
|
|
||||||
for a in $PROGS; do
|
|
||||||
if [ -f "output/$a.out" ]; then
|
|
||||||
echo "Running $a:"
|
|
||||||
./$a | diff output/$a.out -
|
|
||||||
else
|
|
||||||
echo "Creating $a.out"
|
|
||||||
./$a > "output/$a.out"
|
|
||||||
git add "output/$a.out"
|
|
||||||
fi
|
|
||||||
done
|
|
@ -1,59 +0,0 @@
|
|||||||
#include <iostream>
|
|
||||||
#include <fstream>
|
|
||||||
#include <cassert>
|
|
||||||
#include <stdexcept>
|
|
||||||
|
|
||||||
#include "../to_utf8.hpp"
|
|
||||||
|
|
||||||
std::string getFirstLine(const std::string &filename);
|
|
||||||
void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile,
|
|
||||||
const std::string &utf8File);
|
|
||||||
|
|
||||||
/// Test character encoding conversion to and from UTF-8
|
|
||||||
void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile,
|
|
||||||
const std::string &utf8File)
|
|
||||||
{
|
|
||||||
// get some test data
|
|
||||||
std::string legacyEncLine = getFirstLine(legacyEncFile);
|
|
||||||
std::string utf8Line = getFirstLine(utf8File);
|
|
||||||
|
|
||||||
// create an encoder for specified character encoding
|
|
||||||
ToUTF8::Utf8Encoder encoder (encoding);
|
|
||||||
|
|
||||||
// convert text to UTF-8
|
|
||||||
std::string convertedUtf8Line = encoder.getUtf8(legacyEncLine);
|
|
||||||
|
|
||||||
std::cout << "original: " << utf8Line << std::endl;
|
|
||||||
std::cout << "converted: " << convertedUtf8Line << std::endl;
|
|
||||||
|
|
||||||
// check correctness
|
|
||||||
assert(convertedUtf8Line == utf8Line);
|
|
||||||
|
|
||||||
// convert UTF-8 text to legacy encoding
|
|
||||||
std::string convertedLegacyEncLine = encoder.getLegacyEnc(utf8Line);
|
|
||||||
// check correctness
|
|
||||||
assert(convertedLegacyEncLine == legacyEncLine);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string getFirstLine(const std::string &filename)
|
|
||||||
{
|
|
||||||
std::string line;
|
|
||||||
std::ifstream text (filename.c_str());
|
|
||||||
|
|
||||||
if (!text.is_open())
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Unable to open file " + filename);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::getline(text, line);
|
|
||||||
text.close();
|
|
||||||
|
|
||||||
return line;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main()
|
|
||||||
{
|
|
||||||
testEncoder(ToUTF8::WINDOWS_1251, "test_data/russian-win1251.txt", "test_data/russian-utf8.txt");
|
|
||||||
testEncoder(ToUTF8::WINDOWS_1252, "test_data/french-win1252.txt", "test_data/french-utf8.txt");
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue