forked from teamnwah/openmw-tes3coop
Merge remote branch 'upstream/master'
This commit is contained in:
commit
a67b49be57
11 changed files with 551 additions and 181 deletions
|
@ -44,6 +44,12 @@ set(NIFOGRE_HEADER
|
||||||
${COMP_DIR}/nifogre/ogre_nif_loader.hpp)
|
${COMP_DIR}/nifogre/ogre_nif_loader.hpp)
|
||||||
source_group(components\\nifogre FILES ${NIFOGRE} ${NIFOGRE_HEADER})
|
source_group(components\\nifogre FILES ${NIFOGRE} ${NIFOGRE_HEADER})
|
||||||
|
|
||||||
|
set(TO_UTF8
|
||||||
|
${COMP_DIR}/to_utf8/to_utf8.cpp)
|
||||||
|
set(TO_UTF8_HEADER
|
||||||
|
${COMP_DIR}/to_utf8/to_utf8.hpp)
|
||||||
|
source_group(components\\to_utf8 FILES ${TO_UTF8} ${TO_UTF8_HEADER})
|
||||||
|
|
||||||
set(ESM_STORE
|
set(ESM_STORE
|
||||||
${COMP_DIR}/esm_store/store.cpp)
|
${COMP_DIR}/esm_store/store.cpp)
|
||||||
set(ESM_STORE_HEADER
|
set(ESM_STORE_HEADER
|
||||||
|
@ -75,10 +81,10 @@ file(GLOB INTERPRETER ${COMP_DIR}/interpreter/*.cpp)
|
||||||
file(GLOB INTERPRETER_HEADER ${COMP_DIR}/interpreter/*.hpp)
|
file(GLOB INTERPRETER_HEADER ${COMP_DIR}/interpreter/*.hpp)
|
||||||
source_group(components\\interpreter FILES ${INTERPRETER} ${INTERPRETER_HEADER})
|
source_group(components\\interpreter FILES ${INTERPRETER} ${INTERPRETER_HEADER})
|
||||||
|
|
||||||
set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${MISC}
|
set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${MISC} ${TO_UTF8}
|
||||||
${COMPILER} ${INTERPRETER} ${ESM})
|
${COMPILER} ${INTERPRETER} ${ESM})
|
||||||
set(COMPONENTS_HEADER ${BSA_HEADER} ${NIF_HEADER} ${NIFOGRE_HEADER} ${ESM_STORE_HEADER}
|
set(COMPONENTS_HEADER ${BSA_HEADER} ${NIF_HEADER} ${NIFOGRE_HEADER} ${ESM_STORE_HEADER}
|
||||||
${ESM_HEADER} ${MISC_HEADER} ${COMPILER_HEADER}
|
${ESM_HEADER} ${MISC_HEADER} ${COMPILER_HEADER} ${TO_UTF8_HEADER}
|
||||||
${INTERPRETER_HEADER})
|
${INTERPRETER_HEADER})
|
||||||
|
|
||||||
# source directory: libs
|
# source directory: libs
|
||||||
|
@ -158,12 +164,10 @@ endif (WIN32)
|
||||||
find_package(OGRE REQUIRED)
|
find_package(OGRE REQUIRED)
|
||||||
find_package(Boost REQUIRED COMPONENTS system filesystem program_options thread)
|
find_package(Boost REQUIRED COMPONENTS system filesystem program_options thread)
|
||||||
find_package(OIS REQUIRED)
|
find_package(OIS REQUIRED)
|
||||||
find_package(Iconv REQUIRED)
|
|
||||||
find_package(OpenAL REQUIRED)
|
find_package(OpenAL REQUIRED)
|
||||||
include_directories("."
|
include_directories("."
|
||||||
${OGRE_INCLUDE_DIR} ${OGRE_INCLUDE_DIR}/Ogre
|
${OGRE_INCLUDE_DIR} ${OGRE_INCLUDE_DIR}/Ogre
|
||||||
${OIS_INCLUDE_DIR} ${Boost_INCLUDE_DIR}
|
${OIS_INCLUDE_DIR} ${Boost_INCLUDE_DIR}
|
||||||
${ICONV_INCLUDE_DIR}
|
|
||||||
${PLATFORM_INCLUDE_DIR}
|
${PLATFORM_INCLUDE_DIR}
|
||||||
${CMAKE_HOME_DIRECTORY}/extern/caelum/include
|
${CMAKE_HOME_DIRECTORY}/extern/caelum/include
|
||||||
${CMAKE_HOME_DIRECTORY}/extern/mygui_3.0.1/MyGUIEngine/include
|
${CMAKE_HOME_DIRECTORY}/extern/mygui_3.0.1/MyGUIEngine/include
|
||||||
|
|
|
@ -9,6 +9,7 @@ source_group(apps\\esmtool FILES ${ESMTOOL})
|
||||||
add_executable(esmtool
|
add_executable(esmtool
|
||||||
${ESMTOOL}
|
${ESMTOOL}
|
||||||
${MISC} ${MISC_HEADER}
|
${MISC} ${MISC_HEADER}
|
||||||
|
${TO_UTF8}
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(esmtool
|
target_link_libraries(esmtool
|
||||||
|
|
|
@ -199,7 +199,6 @@ target_link_libraries(openmw
|
||||||
${Boost_LIBRARIES}
|
${Boost_LIBRARIES}
|
||||||
${OPENAL_LIBRARY}
|
${OPENAL_LIBRARY}
|
||||||
${SOUND_INPUT_LIBRARY}
|
${SOUND_INPUT_LIBRARY}
|
||||||
${ICONV_LIBRARIES}
|
|
||||||
caelum
|
caelum
|
||||||
MyGUIEngine
|
MyGUIEngine
|
||||||
MyGUI.OgrePlatform
|
MyGUI.OgrePlatform
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
# - Try to find Iconv
|
|
||||||
# Once done this will define
|
|
||||||
#
|
|
||||||
# ICONV_FOUND - system has Iconv
|
|
||||||
# ICONV_INCLUDE_DIR - the Iconv include directory
|
|
||||||
# ICONV_LIBRARIES - Link these to use Iconv
|
|
||||||
# ICONV_SECOND_ARGUMENT_IS_CONST - the second argument for iconv() is const
|
|
||||||
#
|
|
||||||
include(CheckCCompilerFlag)
|
|
||||||
include(CheckCXXSourceCompiles)
|
|
||||||
|
|
||||||
IF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
|
||||||
# Already in cache, be silent
|
|
||||||
SET(ICONV_FIND_QUIETLY TRUE)
|
|
||||||
ENDIF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
|
||||||
|
|
||||||
IF(WIN32)
|
|
||||||
SET(ICONV_INCLUDE_DIR $ENV{ICONV_INCLUDE_DIR})
|
|
||||||
SET(ICONV_LIBRARIES $ENV{ICONV_LIBRARIES})
|
|
||||||
ENDIF(WIN32)
|
|
||||||
|
|
||||||
FIND_PATH(ICONV_INCLUDE_DIR iconv.h)
|
|
||||||
|
|
||||||
FIND_LIBRARY(ICONV_LIBRARIES NAMES iconv libiconv c)
|
|
||||||
|
|
||||||
IF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
|
||||||
SET(ICONV_FOUND TRUE)
|
|
||||||
ENDIF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
|
||||||
|
|
||||||
set(CMAKE_REQUIRED_INCLUDES ${ICONV_INCLUDE_DIR})
|
|
||||||
set(CMAKE_REQUIRED_LIBRARIES ${ICONV_LIBRARIES})
|
|
||||||
IF(ICONV_FOUND)
|
|
||||||
check_c_compiler_flag("-Werror" ICONV_HAVE_WERROR)
|
|
||||||
set (CMAKE_C_FLAGS_BACKUP "${CMAKE_C_FLAGS}")
|
|
||||||
if(ICONV_HAVE_WERROR)
|
|
||||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
|
|
||||||
endif(ICONV_HAVE_WERROR)
|
|
||||||
check_c_source_compiles("
|
|
||||||
#include <iconv.h>
|
|
||||||
int main(){
|
|
||||||
iconv_t conv = 0;
|
|
||||||
const char* in = 0;
|
|
||||||
size_t ilen = 0;
|
|
||||||
char* out = 0;
|
|
||||||
size_t olen = 0;
|
|
||||||
iconv(conv, &in, &ilen, &out, &olen);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
" ICONV_SECOND_ARGUMENT_IS_CONST )
|
|
||||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS_BACKUP}")
|
|
||||||
ENDIF(ICONV_FOUND)
|
|
||||||
set(CMAKE_REQUIRED_INCLUDES)
|
|
||||||
set(CMAKE_REQUIRED_LIBRARIES)
|
|
||||||
|
|
||||||
IF(ICONV_FOUND)
|
|
||||||
IF(NOT ICONV_FIND_QUIETLY)
|
|
||||||
MESSAGE(STATUS "Found Iconv: ${ICONV_LIBRARIES}")
|
|
||||||
ENDIF(NOT ICONV_FIND_QUIETLY)
|
|
||||||
ELSE(ICONV_FOUND)
|
|
||||||
IF(Iconv_FIND_REQUIRED)
|
|
||||||
MESSAGE(FATAL_ERROR "Could not find Iconv")
|
|
||||||
ENDIF(Iconv_FIND_REQUIRED)
|
|
||||||
ENDIF(ICONV_FOUND)
|
|
||||||
|
|
||||||
MARK_AS_ADVANCED(
|
|
||||||
ICONV_INCLUDE_DIR
|
|
||||||
ICONV_LIBRARIES
|
|
||||||
ICONV_SECOND_ARGUMENT_IS_CONST
|
|
||||||
)
|
|
|
@ -3,22 +3,18 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <libs/platform/stdint.h>
|
#include <libs/platform/stdint.h>
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iomanip>
|
#include <string.h>
|
||||||
#include <errno.h>
|
|
||||||
|
|
||||||
#ifndef __WIN32__
|
|
||||||
#include <iconv.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <libs/mangle/stream/stream.hpp>
|
#include <libs/mangle/stream/stream.hpp>
|
||||||
#include <libs/mangle/stream/servers/file_stream.hpp>
|
#include <libs/mangle/stream/servers/file_stream.hpp>
|
||||||
#include <libs/mangle/tools/str_exception.hpp>
|
#include <libs/mangle/tools/str_exception.hpp>
|
||||||
#include <components/misc/stringops.hpp>
|
#include <components/misc/stringops.hpp>
|
||||||
|
|
||||||
|
#include <components/to_utf8/to_utf8.hpp>
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
// need our own implementation of strnlen
|
// need our own implementation of strnlen
|
||||||
static size_t strnlen(const char *s, size_t n)
|
static size_t strnlen(const char *s, size_t n)
|
||||||
|
@ -603,112 +599,17 @@ public:
|
||||||
void getName(NAME &name) { getT(name); }
|
void getName(NAME &name) { getT(name); }
|
||||||
void getUint(uint32_t &u) { getT(u); }
|
void getUint(uint32_t &u) { getT(u); }
|
||||||
|
|
||||||
// Read the next size bytes and return them as a string
|
// Read the next 'size' bytes and return them as a string. Converts
|
||||||
|
// them from native encoding to UTF8 in the process.
|
||||||
std::string getString(int size)
|
std::string getString(int size)
|
||||||
{
|
{
|
||||||
// Not very optimized, but we can fix that later
|
char *ptr = ToUTF8::getBuffer(size);
|
||||||
char *ptr = new char[size];
|
|
||||||
esm->read(ptr,size);
|
esm->read(ptr,size);
|
||||||
|
|
||||||
// Remove any zero terminators
|
// Convert to UTF8 and return
|
||||||
for(int i=0; i<size; i++)
|
return ToUTF8::getUtf8(ToUTF8::WINDOWS_1252);
|
||||||
if(ptr[i] == 0)
|
|
||||||
size = i;
|
|
||||||
|
|
||||||
// Convert to std::string and return
|
|
||||||
std::string res(ptr,size);
|
|
||||||
delete[] ptr;
|
|
||||||
return convertToUTF8(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert a string from the encoding used by Morrowind to UTF-8
|
|
||||||
std::string convertToUTF8 (std::string input)
|
|
||||||
{
|
|
||||||
#ifdef __WIN32__
|
|
||||||
return input;
|
|
||||||
#else
|
|
||||||
std::string output = "";
|
|
||||||
|
|
||||||
//create convert description
|
|
||||||
iconv_t cd = iconv_open ("UTF-8", "WINDOWS-1252");
|
|
||||||
|
|
||||||
if (cd == (iconv_t)-1) //error handling
|
|
||||||
{
|
|
||||||
std::string errMsg = "Creating description for UTF-8 converting failed: ";
|
|
||||||
|
|
||||||
switch (errno) //detailed error messages (maybe it contains too much detail :)
|
|
||||||
{
|
|
||||||
case EMFILE:
|
|
||||||
errMsg += "{OPEN_MAX} files descriptors are currently open in the calling process.";
|
|
||||||
case ENFILE:
|
|
||||||
errMsg += "Too many files are currently open in the system.";
|
|
||||||
case ENOMEM:
|
|
||||||
errMsg +="Insufficient storage space is available.";
|
|
||||||
case EINVAL:
|
|
||||||
errMsg += "The conversion specified by fromcode and tocode is not supported by the implementation.";
|
|
||||||
|
|
||||||
default:
|
|
||||||
errMsg += "Unknown Error\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
fail (errMsg);
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const size_t inputSize = input.size();
|
|
||||||
|
|
||||||
if (inputSize) //input is not empty
|
|
||||||
{
|
|
||||||
//convert function doesn't accept const char *, therefore copy content into an char *
|
|
||||||
std::vector<char> inputBuffer (input.begin(), input.end());
|
|
||||||
char *inputBufferBegin = &inputBuffer[0];
|
|
||||||
|
|
||||||
size_t inputBytesLeft = inputSize; //bytes to convert
|
|
||||||
|
|
||||||
static const size_t outputSize = 1000;
|
|
||||||
size_t outputBytesLeft;
|
|
||||||
|
|
||||||
char outputBuffer[outputSize];
|
|
||||||
char *outputBufferBegin;
|
|
||||||
|
|
||||||
while (inputBytesLeft > 0)
|
|
||||||
{
|
|
||||||
outputBytesLeft = outputSize;
|
|
||||||
outputBufferBegin = outputBuffer;
|
|
||||||
|
|
||||||
if (iconv (cd, &inputBufferBegin, &inputBytesLeft, &outputBufferBegin, &outputBytesLeft) == (size_t)-1)
|
|
||||||
{
|
|
||||||
switch (errno)
|
|
||||||
{
|
|
||||||
case E2BIG: //outputBuffer is full
|
|
||||||
output += std::string (outputBuffer, outputSize);
|
|
||||||
break;
|
|
||||||
case EILSEQ:
|
|
||||||
fail ("Iconv: Invalid multibyte sequence.\n");
|
|
||||||
break;
|
|
||||||
case EINVAL:
|
|
||||||
fail ("Iconv: Incomplete multibyte sequence.\n");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fail ("Iconv: Unknown Error\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//read only relevant bytes from outputBuffer
|
|
||||||
output += std::string (outputBuffer, outputSize - outputBytesLeft);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
iconv_close (cd);
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void skip(int bytes) { esm->seek(esm->tell()+bytes); }
|
void skip(int bytes) { esm->seek(esm->tell()+bytes); }
|
||||||
uint64_t getOffset() { return esm->tell(); }
|
uint64_t getOffset() { return esm->tell(); }
|
||||||
|
|
||||||
|
|
1
components/to_utf8/.gitignore
vendored
Normal file
1
components/to_utf8/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
gen_iconv
|
5
components/to_utf8/Makefile
Normal file
5
components/to_utf8/Makefile
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
tables_gen.hpp: gen_iconv
|
||||||
|
gen_iconv > tables_gen.hpp
|
||||||
|
|
||||||
|
gen_iconv: gen_iconv.cpp
|
||||||
|
g++ -Wall $^ -o $@
|
86
components/to_utf8/gen_iconv.cpp
Normal file
86
components/to_utf8/gen_iconv.cpp
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
// This program generates the file tables_gen.hpp
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include <iconv.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
void tab() { cout << " "; }
|
||||||
|
|
||||||
|
// write one number with a space in front of it and a comma after it
|
||||||
|
void num(unsigned char i, bool last)
|
||||||
|
{
|
||||||
|
cout << " 0x" << (unsigned)i;
|
||||||
|
if(!last) cout << ",";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write one table entry (UTF8 value), 1-5 bytes
|
||||||
|
void writeChar(char *value, int length, bool last, const std::string &comment="")
|
||||||
|
{
|
||||||
|
assert(length >= 1 && length <= 5);
|
||||||
|
tab();
|
||||||
|
num(length, false);
|
||||||
|
for(int i=0;i<5;i++)
|
||||||
|
num(value[i], last && i==4);
|
||||||
|
|
||||||
|
if(comment != "")
|
||||||
|
cout << " // " << comment;
|
||||||
|
|
||||||
|
cout << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// What to write on missing characters
|
||||||
|
void writeMissing(bool last)
|
||||||
|
{
|
||||||
|
// Just write a space character
|
||||||
|
char value[5];
|
||||||
|
value[0] = ' ';
|
||||||
|
for(int i=1; i<5; i++)
|
||||||
|
value[i] = 0;
|
||||||
|
writeChar(value, 1, last, "not part of this charset");
|
||||||
|
}
|
||||||
|
|
||||||
|
int write_table(const std::string &charset, const std::string &tableName)
|
||||||
|
{
|
||||||
|
// Write table header
|
||||||
|
cout << "static char " << tableName << "[] =\n{\n";
|
||||||
|
|
||||||
|
// Open conversion system
|
||||||
|
iconv_t cd = iconv_open ("UTF-8", charset.c_str());
|
||||||
|
|
||||||
|
// Convert each character from 0 to 255
|
||||||
|
for(int i=0; i<256; i++)
|
||||||
|
{
|
||||||
|
bool last = (i==255);
|
||||||
|
|
||||||
|
char input = i;
|
||||||
|
char *iptr = &input;
|
||||||
|
size_t ileft = 1;
|
||||||
|
|
||||||
|
char output[5];
|
||||||
|
for(int k=0; k<5; k++) output[k] = 0;
|
||||||
|
char *optr = output;
|
||||||
|
size_t oleft = 5;
|
||||||
|
|
||||||
|
size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);
|
||||||
|
|
||||||
|
if(res) writeMissing(last);
|
||||||
|
else writeChar(output, 5-oleft, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
iconv_close (cd);
|
||||||
|
|
||||||
|
// Finish table
|
||||||
|
cout << "};\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
cout << hex;
|
||||||
|
|
||||||
|
// English
|
||||||
|
write_table("WINDOWS-1252", "windows_1252");
|
||||||
|
return 0;
|
||||||
|
}
|
259
components/to_utf8/tables_gen.hpp
Normal file
259
components/to_utf8/tables_gen.hpp
Normal file
|
@ -0,0 +1,259 @@
|
||||||
|
static char windows_1252[] =
|
||||||
|
{
|
||||||
|
0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x8, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x9, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xa, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xb, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xc, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xd, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xe, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0xf, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x10, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x11, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x12, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x13, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x14, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x15, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x16, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x17, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x18, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x19, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x1f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x21, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x22, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x23, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x24, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x25, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x26, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x27, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x28, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x29, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x2f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x30, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x31, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x32, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x33, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x34, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x35, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x36, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x37, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x38, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x39, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x3f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x40, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x41, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x42, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x43, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x44, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x45, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x46, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x47, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x48, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x49, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x4f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x50, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x51, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x52, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x53, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x54, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x55, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x56, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x57, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x58, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x59, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x5f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x60, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x61, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x62, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x63, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x64, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x65, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x66, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x67, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x68, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x69, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x6f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x70, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x71, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x72, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x73, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x74, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x75, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x76, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x77, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x78, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x79, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7a, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7b, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7c, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7d, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7e, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x7f, 0x0, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x82, 0xac, 0x0, 0x0,
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||||
|
0x3, 0xe2, 0x80, 0x9a, 0x0, 0x0,
|
||||||
|
0x2, 0xc6, 0x92, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x9e, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xa6, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xa0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xa1, 0x0, 0x0,
|
||||||
|
0x2, 0xcb, 0x86, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xb0, 0x0, 0x0,
|
||||||
|
0x2, 0xc5, 0xa0, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xb9, 0x0, 0x0,
|
||||||
|
0x2, 0xc5, 0x92, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||||
|
0x2, 0xc5, 0xbd, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||||
|
0x3, 0xe2, 0x80, 0x98, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x99, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x9c, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x9d, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xa2, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x93, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0x94, 0x0, 0x0,
|
||||||
|
0x2, 0xcb, 0x9c, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x84, 0xa2, 0x0, 0x0,
|
||||||
|
0x2, 0xc5, 0xa1, 0x0, 0x0, 0x0,
|
||||||
|
0x3, 0xe2, 0x80, 0xba, 0x0, 0x0,
|
||||||
|
0x2, 0xc5, 0x93, 0x0, 0x0, 0x0,
|
||||||
|
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||||
|
0x2, 0xc5, 0xbe, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc5, 0xb8, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa0, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa1, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa2, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa3, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa4, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa5, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa6, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa7, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa8, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xa9, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xaa, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xab, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xac, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xad, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xae, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xaf, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb0, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb1, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb2, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb3, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb4, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb5, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb6, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb7, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb8, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xb9, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xba, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xbb, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xbc, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xbd, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xbe, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc2, 0xbf, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x80, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x81, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x82, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x83, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x84, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x85, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x86, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x87, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x88, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x89, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8a, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8b, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8c, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8d, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8e, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x8f, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x90, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x91, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x92, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x93, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x94, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x95, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x96, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x97, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x98, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x99, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9a, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9b, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9c, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9d, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9e, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0x9f, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa0, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa1, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa2, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa3, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa4, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa5, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa6, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa7, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa8, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xa9, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xaa, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xab, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xac, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xad, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xae, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xaf, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb0, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb1, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb2, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb3, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb4, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb5, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb6, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb7, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb8, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xb9, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xba, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xbb, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xbc, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xbd, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xbe, 0x0, 0x0, 0x0,
|
||||||
|
0x2, 0xc3, 0xbf, 0x0, 0x0, 0x0
|
||||||
|
};
|
159
components/to_utf8/to_utf8.cpp
Normal file
159
components/to_utf8/to_utf8.cpp
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
#include "to_utf8.hpp"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
/* This file contains the code to translate from WINDOWS-1252 (native
|
||||||
|
charset used in English version of Morrowind) to UTF-8. The library
|
||||||
|
is designed to be extened to support more source encodings later,
|
||||||
|
which means that we may add support for Russian, Polish and Chinese
|
||||||
|
files and so on.
|
||||||
|
|
||||||
|
The code does not depend on any external library at
|
||||||
|
runtime. Instead, it uses a pregenerated table made with iconv (see
|
||||||
|
gen_iconv.cpp and the Makefile) which is located in tables_gen.hpp.
|
||||||
|
|
||||||
|
This is both faster and uses less dependencies. The tables would
|
||||||
|
only need to be regenerated if we are adding support more input
|
||||||
|
encodings. As such, there is no need to make the generator code
|
||||||
|
platform independent.
|
||||||
|
|
||||||
|
The library is optimized for the case of pure ASCII input strings,
|
||||||
|
which is the vast majority of cases at least for the English
|
||||||
|
version. A test of my version of Morrowind.esm got 130 non-ASCII vs
|
||||||
|
236195 ASCII strings, or less than 0.06% of strings containing
|
||||||
|
non-ASCII characters.
|
||||||
|
|
||||||
|
To optmize for this, ff the first pass of the string does not find
|
||||||
|
any non-ASCII characters, the entire string is passed along without
|
||||||
|
any modification.
|
||||||
|
|
||||||
|
Most of the non-ASCII strings are books, and are quite large. (The
|
||||||
|
non-ASCII characters are typically starting and ending quotation
|
||||||
|
marks.) Within these, almost all the characters are ASCII. For this
|
||||||
|
purpose, the library is also optimized for mostly-ASCII contents
|
||||||
|
even in the cases where some conversion is necessary.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
// Generated tables
|
||||||
|
#include "tables_gen.hpp"
|
||||||
|
|
||||||
|
// Shared global buffers, we love you.
|
||||||
|
static std::vector<char> buf;
|
||||||
|
static std::vector<char> output;
|
||||||
|
static int size;
|
||||||
|
|
||||||
|
// Make sure the given vector is large enough for 'size' bytes,
|
||||||
|
// including a terminating zero after it.
|
||||||
|
static void resize(std::vector<char> &buf, size_t size)
|
||||||
|
{
|
||||||
|
if(buf.size() <= size)
|
||||||
|
// Add some extra padding to reduce the chance of having to resize
|
||||||
|
// again later.
|
||||||
|
buf.resize(3*size);
|
||||||
|
|
||||||
|
// And make sure the string is zero terminated
|
||||||
|
buf[size] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is just used to spew out a reusable input buffer for the
|
||||||
|
// conversion process.
|
||||||
|
char *ToUTF8::getBuffer(int s)
|
||||||
|
{
|
||||||
|
// Remember the requested size
|
||||||
|
size = s;
|
||||||
|
resize(buf, size);
|
||||||
|
return &buf[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the total length length needed to decode the given string with
|
||||||
|
the given translation array. The arrays are encoded with 6 bytes
|
||||||
|
per character, with the first giving the length and the next 5 the
|
||||||
|
actual data.
|
||||||
|
|
||||||
|
The function serves a dual purpose for optimization reasons: it
|
||||||
|
checks if the input is pure ascii (all values are <= 127). If this
|
||||||
|
is the case, then the ascii parameter is set to true, and the
|
||||||
|
caller can optimize for this case.
|
||||||
|
*/
|
||||||
|
static size_t getLength(const char *arr, const char* input, bool &ascii)
|
||||||
|
{
|
||||||
|
ascii = true;
|
||||||
|
size_t len = 0;
|
||||||
|
unsigned char inp = *input;
|
||||||
|
while(inp)
|
||||||
|
{
|
||||||
|
if(inp > 127) ascii = false;
|
||||||
|
len += arr[inp*6];
|
||||||
|
inp = *(++input);
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Translate one character 'ch' using the translation array 'arr', and
|
||||||
|
// advance the output pointer accordingly.
|
||||||
|
static void copyFromArray(const char *arr, unsigned char ch, char* &out)
|
||||||
|
{
|
||||||
|
// Optimize for ASCII values
|
||||||
|
if(ch < 128)
|
||||||
|
{
|
||||||
|
*(out++) = ch;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *in = arr + ch*6;
|
||||||
|
int len = *(in++);
|
||||||
|
for(int i=0; i<len; i++)
|
||||||
|
*(out++) = *(in++);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ToUTF8::getUtf8(ToUTF8::FromType from)
|
||||||
|
{
|
||||||
|
// Pick translation array
|
||||||
|
const char *arr;
|
||||||
|
if(from == ToUTF8::WINDOWS_1252)
|
||||||
|
arr = windows_1252;
|
||||||
|
else
|
||||||
|
assert(0);
|
||||||
|
|
||||||
|
// Double check that the input string stops at some point (it might
|
||||||
|
// contain zero terminators before this, inside its own data, which
|
||||||
|
// is also ok.)
|
||||||
|
const char* input = &buf[0];
|
||||||
|
assert(input[size] == 0);
|
||||||
|
|
||||||
|
// TODO: The rest of this function is designed for single-character
|
||||||
|
// input encodings only. It also assumes that the input the input
|
||||||
|
// encoding shares its first 128 values (0-127) with ASCII. These
|
||||||
|
// conditions must be checked again if you add more input encodings
|
||||||
|
// later.
|
||||||
|
|
||||||
|
// Compute output length, and check for pure ascii input at the same
|
||||||
|
// time.
|
||||||
|
bool ascii;
|
||||||
|
size_t outlen = getLength(arr, input, ascii);
|
||||||
|
|
||||||
|
// If we're pure ascii, then don't bother converting anything.
|
||||||
|
if(ascii)
|
||||||
|
return std::string(input, outlen);
|
||||||
|
|
||||||
|
// Make sure the output is large enough
|
||||||
|
resize(output, outlen);
|
||||||
|
char *out = &output[0];
|
||||||
|
|
||||||
|
// Translate
|
||||||
|
while(*input)
|
||||||
|
copyFromArray(arr, *(input++), out);
|
||||||
|
|
||||||
|
// Make sure that we wrote the correct number of bytes
|
||||||
|
assert((out-&output[0]) == (int)outlen);
|
||||||
|
|
||||||
|
// And make extra sure the output is null terminated
|
||||||
|
assert(output.size() > outlen);
|
||||||
|
assert(output[outlen] == 0);
|
||||||
|
|
||||||
|
// Return a string
|
||||||
|
return std::string(&output[0], outlen);
|
||||||
|
}
|
||||||
|
|
24
components/to_utf8/to_utf8.hpp
Normal file
24
components/to_utf8/to_utf8.hpp
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
#ifndef COMPONENTS_TOUTF8_H
|
||||||
|
#define COMPONENTS_TOUTF8_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace ToUTF8
|
||||||
|
{
|
||||||
|
// These are all the currently supported code pages
|
||||||
|
enum FromType
|
||||||
|
{
|
||||||
|
WINDOWS_1252 // Used by English version of Morrowind (and
|
||||||
|
// probably others)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Return a writable buffer of at least 'size' bytes. The buffer
|
||||||
|
// does not have to be freed.
|
||||||
|
char* getBuffer(int size);
|
||||||
|
|
||||||
|
// Convert the previously written buffer to UTF8 from the given code
|
||||||
|
// page.
|
||||||
|
std::string getUtf8(FromType from);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue