Merge remote branch 'upstream/master'
commit
a67b49be57
@ -1,69 +0,0 @@
|
||||
# - Try to find Iconv
|
||||
# Once done this will define
|
||||
#
|
||||
# ICONV_FOUND - system has Iconv
|
||||
# ICONV_INCLUDE_DIR - the Iconv include directory
|
||||
# ICONV_LIBRARIES - Link these to use Iconv
|
||||
# ICONV_SECOND_ARGUMENT_IS_CONST - the second argument for iconv() is const
|
||||
#
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
IF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
||||
# Already in cache, be silent
|
||||
SET(ICONV_FIND_QUIETLY TRUE)
|
||||
ENDIF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
||||
|
||||
IF(WIN32)
|
||||
SET(ICONV_INCLUDE_DIR $ENV{ICONV_INCLUDE_DIR})
|
||||
SET(ICONV_LIBRARIES $ENV{ICONV_LIBRARIES})
|
||||
ENDIF(WIN32)
|
||||
|
||||
FIND_PATH(ICONV_INCLUDE_DIR iconv.h)
|
||||
|
||||
FIND_LIBRARY(ICONV_LIBRARIES NAMES iconv libiconv c)
|
||||
|
||||
IF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
||||
SET(ICONV_FOUND TRUE)
|
||||
ENDIF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
|
||||
|
||||
set(CMAKE_REQUIRED_INCLUDES ${ICONV_INCLUDE_DIR})
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${ICONV_LIBRARIES})
|
||||
IF(ICONV_FOUND)
|
||||
check_c_compiler_flag("-Werror" ICONV_HAVE_WERROR)
|
||||
set (CMAKE_C_FLAGS_BACKUP "${CMAKE_C_FLAGS}")
|
||||
if(ICONV_HAVE_WERROR)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
|
||||
endif(ICONV_HAVE_WERROR)
|
||||
check_c_source_compiles("
|
||||
#include <iconv.h>
|
||||
int main(){
|
||||
iconv_t conv = 0;
|
||||
const char* in = 0;
|
||||
size_t ilen = 0;
|
||||
char* out = 0;
|
||||
size_t olen = 0;
|
||||
iconv(conv, &in, &ilen, &out, &olen);
|
||||
return 0;
|
||||
}
|
||||
" ICONV_SECOND_ARGUMENT_IS_CONST )
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS_BACKUP}")
|
||||
ENDIF(ICONV_FOUND)
|
||||
set(CMAKE_REQUIRED_INCLUDES)
|
||||
set(CMAKE_REQUIRED_LIBRARIES)
|
||||
|
||||
IF(ICONV_FOUND)
|
||||
IF(NOT ICONV_FIND_QUIETLY)
|
||||
MESSAGE(STATUS "Found Iconv: ${ICONV_LIBRARIES}")
|
||||
ENDIF(NOT ICONV_FIND_QUIETLY)
|
||||
ELSE(ICONV_FOUND)
|
||||
IF(Iconv_FIND_REQUIRED)
|
||||
MESSAGE(FATAL_ERROR "Could not find Iconv")
|
||||
ENDIF(Iconv_FIND_REQUIRED)
|
||||
ENDIF(ICONV_FOUND)
|
||||
|
||||
MARK_AS_ADVANCED(
|
||||
ICONV_INCLUDE_DIR
|
||||
ICONV_LIBRARIES
|
||||
ICONV_SECOND_ARGUMENT_IS_CONST
|
||||
)
|
@ -0,0 +1 @@
|
||||
gen_iconv
|
@ -0,0 +1,5 @@
|
||||
tables_gen.hpp: gen_iconv
|
||||
gen_iconv > tables_gen.hpp
|
||||
|
||||
gen_iconv: gen_iconv.cpp
|
||||
g++ -Wall $^ -o $@
|
@ -0,0 +1,86 @@
|
||||
// This program generates the file tables_gen.hpp
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
using namespace std;
|
||||
|
||||
#include <iconv.h>
|
||||
#include <assert.h>
|
||||
|
||||
void tab() { cout << " "; }
|
||||
|
||||
// write one number with a space in front of it and a comma after it
|
||||
void num(unsigned char i, bool last)
|
||||
{
|
||||
cout << " 0x" << (unsigned)i;
|
||||
if(!last) cout << ",";
|
||||
}
|
||||
|
||||
// Write one table entry (UTF8 value), 1-5 bytes
|
||||
void writeChar(char *value, int length, bool last, const std::string &comment="")
|
||||
{
|
||||
assert(length >= 1 && length <= 5);
|
||||
tab();
|
||||
num(length, false);
|
||||
for(int i=0;i<5;i++)
|
||||
num(value[i], last && i==4);
|
||||
|
||||
if(comment != "")
|
||||
cout << " // " << comment;
|
||||
|
||||
cout << endl;
|
||||
}
|
||||
|
||||
// What to write on missing characters
|
||||
void writeMissing(bool last)
|
||||
{
|
||||
// Just write a space character
|
||||
char value[5];
|
||||
value[0] = ' ';
|
||||
for(int i=1; i<5; i++)
|
||||
value[i] = 0;
|
||||
writeChar(value, 1, last, "not part of this charset");
|
||||
}
|
||||
|
||||
int write_table(const std::string &charset, const std::string &tableName)
|
||||
{
|
||||
// Write table header
|
||||
cout << "static char " << tableName << "[] =\n{\n";
|
||||
|
||||
// Open conversion system
|
||||
iconv_t cd = iconv_open ("UTF-8", charset.c_str());
|
||||
|
||||
// Convert each character from 0 to 255
|
||||
for(int i=0; i<256; i++)
|
||||
{
|
||||
bool last = (i==255);
|
||||
|
||||
char input = i;
|
||||
char *iptr = &input;
|
||||
size_t ileft = 1;
|
||||
|
||||
char output[5];
|
||||
for(int k=0; k<5; k++) output[k] = 0;
|
||||
char *optr = output;
|
||||
size_t oleft = 5;
|
||||
|
||||
size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);
|
||||
|
||||
if(res) writeMissing(last);
|
||||
else writeChar(output, 5-oleft, last);
|
||||
}
|
||||
|
||||
iconv_close (cd);
|
||||
|
||||
// Finish table
|
||||
cout << "};\n";
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
cout << hex;
|
||||
|
||||
// English
|
||||
write_table("WINDOWS-1252", "windows_1252");
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,259 @@
|
||||
static char windows_1252[] =
|
||||
{
|
||||
0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x8, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x9, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xa, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xb, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xc, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xd, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xe, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0xf, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x10, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x11, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x12, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x13, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x14, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x15, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x16, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x17, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x18, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x19, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x1f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x21, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x22, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x23, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x24, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x25, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x26, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x27, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x28, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x29, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x2f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x30, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x31, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x32, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x33, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x34, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x35, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x36, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x37, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x38, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x39, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x3f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x40, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x41, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x42, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x43, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x44, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x45, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x46, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x47, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x48, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x49, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x4f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x50, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x51, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x52, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x53, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x54, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x55, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x56, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x57, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x58, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x59, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x5f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x60, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x61, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x62, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x63, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x64, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x65, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x66, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x67, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x68, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x69, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x6f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x70, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x71, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x72, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x73, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x74, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x75, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x76, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x77, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x78, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x79, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7a, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7b, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7c, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7d, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7e, 0x0, 0x0, 0x0, 0x0,
|
||||
0x1, 0x7f, 0x0, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x82, 0xac, 0x0, 0x0,
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||
0x3, 0xe2, 0x80, 0x9a, 0x0, 0x0,
|
||||
0x2, 0xc6, 0x92, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x9e, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xa6, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xa0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xa1, 0x0, 0x0,
|
||||
0x2, 0xcb, 0x86, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xb0, 0x0, 0x0,
|
||||
0x2, 0xc5, 0xa0, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xb9, 0x0, 0x0,
|
||||
0x2, 0xc5, 0x92, 0x0, 0x0, 0x0,
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||
0x2, 0xc5, 0xbd, 0x0, 0x0, 0x0,
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||
0x3, 0xe2, 0x80, 0x98, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x99, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x9c, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x9d, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xa2, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x93, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0x94, 0x0, 0x0,
|
||||
0x2, 0xcb, 0x9c, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x84, 0xa2, 0x0, 0x0,
|
||||
0x2, 0xc5, 0xa1, 0x0, 0x0, 0x0,
|
||||
0x3, 0xe2, 0x80, 0xba, 0x0, 0x0,
|
||||
0x2, 0xc5, 0x93, 0x0, 0x0, 0x0,
|
||||
0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
|
||||
0x2, 0xc5, 0xbe, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc5, 0xb8, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa0, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa1, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa2, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa3, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa4, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa5, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa6, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa7, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa8, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xa9, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xaa, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xab, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xac, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xad, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xae, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xaf, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb0, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb1, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb2, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb3, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb4, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb5, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb6, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb7, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb8, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xb9, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xba, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xbb, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xbc, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xbd, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xbe, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc2, 0xbf, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x80, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x81, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x82, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x83, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x84, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x85, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x86, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x87, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x88, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x89, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8a, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8b, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8c, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8d, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8e, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x8f, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x90, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x91, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x92, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x93, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x94, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x95, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x96, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x97, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x98, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x99, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9a, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9b, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9c, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9d, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9e, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0x9f, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa0, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa1, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa2, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa3, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa4, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa5, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa6, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa7, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa8, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xa9, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xaa, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xab, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xac, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xad, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xae, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xaf, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb0, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb1, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb2, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb3, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb4, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb5, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb6, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb7, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb8, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xb9, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xba, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xbb, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xbc, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xbd, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xbe, 0x0, 0x0, 0x0,
|
||||
0x2, 0xc3, 0xbf, 0x0, 0x0, 0x0
|
||||
};
|
@ -0,0 +1,159 @@
|
||||
#include "to_utf8.hpp"
|
||||
|
||||
#include <vector>
|
||||
#include <assert.h>
|
||||
|
||||
/* This file contains the code to translate from WINDOWS-1252 (native
|
||||
charset used in English version of Morrowind) to UTF-8. The library
|
||||
is designed to be extened to support more source encodings later,
|
||||
which means that we may add support for Russian, Polish and Chinese
|
||||
files and so on.
|
||||
|
||||
The code does not depend on any external library at
|
||||
runtime. Instead, it uses a pregenerated table made with iconv (see
|
||||
gen_iconv.cpp and the Makefile) which is located in tables_gen.hpp.
|
||||
|
||||
This is both faster and uses less dependencies. The tables would
|
||||
only need to be regenerated if we are adding support more input
|
||||
encodings. As such, there is no need to make the generator code
|
||||
platform independent.
|
||||
|
||||
The library is optimized for the case of pure ASCII input strings,
|
||||
which is the vast majority of cases at least for the English
|
||||
version. A test of my version of Morrowind.esm got 130 non-ASCII vs
|
||||
236195 ASCII strings, or less than 0.06% of strings containing
|
||||
non-ASCII characters.
|
||||
|
||||
To optmize for this, ff the first pass of the string does not find
|
||||
any non-ASCII characters, the entire string is passed along without
|
||||
any modification.
|
||||
|
||||
Most of the non-ASCII strings are books, and are quite large. (The
|
||||
non-ASCII characters are typically starting and ending quotation
|
||||
marks.) Within these, almost all the characters are ASCII. For this
|
||||
purpose, the library is also optimized for mostly-ASCII contents
|
||||
even in the cases where some conversion is necessary.
|
||||
*/
|
||||
|
||||
|
||||
// Generated tables
|
||||
#include "tables_gen.hpp"
|
||||
|
||||
// Shared global buffers, we love you.
|
||||
static std::vector<char> buf;
|
||||
static std::vector<char> output;
|
||||
static int size;
|
||||
|
||||
// Make sure the given vector is large enough for 'size' bytes,
|
||||
// including a terminating zero after it.
|
||||
static void resize(std::vector<char> &buf, size_t size)
|
||||
{
|
||||
if(buf.size() <= size)
|
||||
// Add some extra padding to reduce the chance of having to resize
|
||||
// again later.
|
||||
buf.resize(3*size);
|
||||
|
||||
// And make sure the string is zero terminated
|
||||
buf[size] = 0;
|
||||
}
|
||||
|
||||
// This is just used to spew out a reusable input buffer for the
|
||||
// conversion process.
|
||||
char *ToUTF8::getBuffer(int s)
|
||||
{
|
||||
// Remember the requested size
|
||||
size = s;
|
||||
resize(buf, size);
|
||||
return &buf[0];
|
||||
}
|
||||
|
||||
/** Get the total length length needed to decode the given string with
|
||||
the given translation array. The arrays are encoded with 6 bytes
|
||||
per character, with the first giving the length and the next 5 the
|
||||
actual data.
|
||||
|
||||
The function serves a dual purpose for optimization reasons: it
|
||||
checks if the input is pure ascii (all values are <= 127). If this
|
||||
is the case, then the ascii parameter is set to true, and the
|
||||
caller can optimize for this case.
|
||||
*/
|
||||
static size_t getLength(const char *arr, const char* input, bool &ascii)
|
||||
{
|
||||
ascii = true;
|
||||
size_t len = 0;
|
||||
unsigned char inp = *input;
|
||||
while(inp)
|
||||
{
|
||||
if(inp > 127) ascii = false;
|
||||
len += arr[inp*6];
|
||||
inp = *(++input);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
// Translate one character 'ch' using the translation array 'arr', and
|
||||
// advance the output pointer accordingly.
|
||||
static void copyFromArray(const char *arr, unsigned char ch, char* &out)
|
||||
{
|
||||
// Optimize for ASCII values
|
||||
if(ch < 128)
|
||||
{
|
||||
*(out++) = ch;
|
||||
return;
|
||||
}
|
||||
|
||||
const char *in = arr + ch*6;
|
||||
int len = *(in++);
|
||||
for(int i=0; i<len; i++)
|
||||
*(out++) = *(in++);
|
||||
}
|
||||
|
||||
std::string ToUTF8::getUtf8(ToUTF8::FromType from)
|
||||
{
|
||||
// Pick translation array
|
||||
const char *arr;
|
||||
if(from == ToUTF8::WINDOWS_1252)
|
||||
arr = windows_1252;
|
||||
else
|
||||
assert(0);
|
||||
|
||||
// Double check that the input string stops at some point (it might
|
||||
// contain zero terminators before this, inside its own data, which
|
||||
// is also ok.)
|
||||
const char* input = &buf[0];
|
||||
assert(input[size] == 0);
|
||||
|
||||
// TODO: The rest of this function is designed for single-character
|
||||
// input encodings only. It also assumes that the input the input
|
||||
// encoding shares its first 128 values (0-127) with ASCII. These
|
||||
// conditions must be checked again if you add more input encodings
|
||||
// later.
|
||||
|
||||
// Compute output length, and check for pure ascii input at the same
|
||||
// time.
|
||||
bool ascii;
|
||||
size_t outlen = getLength(arr, input, ascii);
|
||||
|
||||
// If we're pure ascii, then don't bother converting anything.
|
||||
if(ascii)
|
||||
return std::string(input, outlen);
|
||||
|
||||
// Make sure the output is large enough
|
||||
resize(output, outlen);
|
||||
char *out = &output[0];
|
||||
|
||||
// Translate
|
||||
while(*input)
|
||||
copyFromArray(arr, *(input++), out);
|
||||
|
||||
// Make sure that we wrote the correct number of bytes
|
||||
assert((out-&output[0]) == (int)outlen);
|
||||
|
||||
// And make extra sure the output is null terminated
|
||||
assert(output.size() > outlen);
|
||||
assert(output[outlen] == 0);
|
||||
|
||||
// Return a string
|
||||
return std::string(&output[0], outlen);
|
||||
}
|
||||
|
@ -0,0 +1,24 @@
|
||||
#ifndef COMPONENTS_TOUTF8_H
|
||||
#define COMPONENTS_TOUTF8_H
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace ToUTF8
|
||||
{
|
||||
// These are all the currently supported code pages
|
||||
enum FromType
|
||||
{
|
||||
WINDOWS_1252 // Used by English version of Morrowind (and
|
||||
// probably others)
|
||||
};
|
||||
|
||||
// Return a writable buffer of at least 'size' bytes. The buffer
|
||||
// does not have to be freed.
|
||||
char* getBuffer(int size);
|
||||
|
||||
// Convert the previously written buffer to UTF8 from the given code
|
||||
// page.
|
||||
std::string getUtf8(FromType from);
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue