From 0bdf52a0719a756f3be97a988ff33784d02f7fcc Mon Sep 17 00:00:00 2001
From: Emanuel Guevel <guevel.emanuel@gmail.com>
Date: Thu, 3 Jan 2013 23:21:14 +0100
Subject: [PATCH] components/to_utf8: keep only Utf8Encoder

---
 components/to_utf8/to_utf8.cpp | 314 ---------------------------------
 components/to_utf8/to_utf8.hpp |   9 -
 2 files changed, 323 deletions(-)
diff --git a/components/to_utf8/to_utf8.cpp b/components/to_utf8/to_utf8.cpp
index 8ac582b81..5efec36a4 100644
--- a/components/to_utf8/to_utf8.cpp
+++ b/components/to_utf8/to_utf8.cpp
@@ -41,13 +41,6 @@
 // Generated tables
 #include "tables_gen.hpp"
 
-// Shared global buffers, we love you. These initial sizes are large
-// enough to hold the largest books in Morrowind.esm, but we will
-// resize automaticall if necessary.
-static std::vector<char> buf    (50*1024);
-static std::vector<char> output (50*1024);
-static int size;
-
 using namespace ToUTF8;
 
 Utf8Encoder::Utf8Encoder(void):
@@ -330,313 +323,6 @@ void Utf8Encoder::copyFromArray2(const char*& chp, char* &out)
     *(out++) = ch; // Could not find glyph, just put whatever
 }
 
-static void resize(std::vector<char> &buf, size_t size)
-{
-    if(buf.size() <= size)
-        // Add some extra padding to reduce the chance of having to resize
-        // again later.
-        buf.resize(3*size);
-
-    // And make sure the string is zero terminated
-    buf[size] = 0;
-}
-
-// This is just used to spew out a reusable input buffer for the
-// conversion process.
-char *ToUTF8::getBuffer(int s)
-{
-    // Remember the requested size
-    size = s;
-    resize(buf, size);
-    return &buf[0];
-}
-
-/** Get the total length length needed to decode the given string with
-  the given translation array. The arrays are encoded with 6 bytes
-  per character, with the first giving the length and the next 5 the
-  actual data.
-
-  The function serves a dual purpose for optimization reasons: it
-  checks if the input is pure ascii (all values are <= 127). If this
-  is the case, then the ascii parameter is set to true, and the
-  caller can optimize for this case.
- */
-static size_t getLength(const char *arr, const char* input, bool &ascii)
-{
-    ascii = true;
-    size_t len = 0;
-    const char* ptr = input;
-    unsigned char inp = *ptr;
-
-    // Do away with the ascii part of the string first (this is almost
-    // always the entire string.)
-    while(inp && inp < 128)
-        inp = *(++ptr);
-    len += (ptr-input);
-
-    // If we're not at the null terminator at this point, then there
-    // were some non-ascii characters to deal with. Go to slow-mode for
-    // the rest of the string.
-    if(inp)
-    {
-        ascii = false;
-        while(inp)
-        {
-            // Find the translated length of this character in the
-            // lookup table.
-            len += arr[inp*6];
-            inp = *(++ptr);
-        }
-    }
-    return len;
-}
-
-// Translate one character 'ch' using the translation array 'arr', and
-// advance the output pointer accordingly.
-static void copyFromArray(const char *arr, unsigned char ch, char* &out)
-{
-    // Optimize for ASCII values
-    if(ch < 128)
-    {
-        *(out++) = ch;
-        return;
-    }
-
-    const char *in = arr + ch*6;
-    int len = *(in++);
-    for(int i=0; i<len; i++)
-        *(out++) = *(in++);
-}
-
-std::string ToUTF8::getUtf8(ToUTF8::FromType from)
-{
-    // Pick translation array
-    const char *arr;
-    switch (from)
-    {
-        case ToUTF8::WINDOWS_1252:
-        {
-            arr = ToUTF8::windows_1252;
-            break;
-        }
-        case ToUTF8::WINDOWS_1250:
-        {
-            arr = ToUTF8::windows_1250;
-            break;
-        }
-        case ToUTF8::WINDOWS_1251:
-        {
-            arr = ToUTF8::windows_1251;
-            break;
-        }
-        default:
-        {
-            assert(0);
-        }
-    }
-
-    // Double check that the input string stops at some point (it might
-    // contain zero terminators before this, inside its own data, which
-    // is also ok.)
-    const char* input = &buf[0];
-    assert(input[size] == 0);
-
-    // TODO: The rest of this function is designed for single-character
-    // input encodings only. It also assumes that the input the input
-    // encoding shares its first 128 values (0-127) with ASCII. These
-    // conditions must be checked again if you add more input encodings
-    // later.
-
-    // Compute output length, and check for pure ascii input at the same
-    // time.
-    bool ascii;
-    size_t outlen = getLength(arr, input, ascii);
-
-    // If we're pure ascii, then don't bother converting anything.
-    if(ascii)
-        return std::string(input, outlen);
-
-    // Make sure the output is large enough
-    resize(output, outlen);
-    char *out = &output[0];
-
-    // Translate
-    while(*input)
-        copyFromArray(arr, *(input++), out);
-
-    // Make sure that we wrote the correct number of bytes
-    assert((out-&output[0]) == (int)outlen);
-
-    // And make extra sure the output is null terminated
-    assert(output.size() > outlen);
-    assert(output[outlen] == 0);
-
-    // Return a string
-    return std::string(&output[0], outlen);
-}
-
-static size_t getLength2(const char *arr, const char* input, bool &ascii)
-{
-    ascii = true;
-    size_t len = 0;
-    const char* ptr = input;
-    unsigned char inp = *ptr;
-
-    // Do away with the ascii part of the string first (this is almost
-    // always the entire string.)
-    while(inp && inp < 128)
-        inp = *(++ptr);
-    len += (ptr-input);
-
-    // If we're not at the null terminator at this point, then there
-    // were some non-ascii characters to deal with. Go to slow-mode for
-    // the rest of the string.
-    if(inp)
-    {
-        ascii = false;
-        while(inp)
-        {
-            len += 1;
-            // Find the translated length of this character in the
-            // lookup table.
-            switch(inp)
-            {
-                case 0xe2: len -= 2; break;
-                case 0xc2:
-                case 0xcb:
-                case 0xc4:
-                case 0xc6:
-                case 0xc3:
-                case 0xd0:
-                case 0xd1:
-                case 0xd2:
-                case 0xc5: len -= 1; break;
-            }
-
-            inp = *(++ptr);
-        }
-    }
-    return len;
-}
-
-static void copyFromArray2(const char *arr, char*& chp, char* &out)
-{
-    unsigned char ch = *(chp++);
-    // Optimize for ASCII values
-    if(ch < 128)
-    {
-        *(out++) = ch;
-        return;
-    }
-
-    int len = 1;
-    switch (ch)
-    {
-        case 0xe2: len = 3; break;
-        case 0xc2:
-        case 0xcb:
-        case 0xc4:
-        case 0xc6:
-        case 0xc3:
-        case 0xd0:
-        case 0xd1:
-        case 0xd2:
-        case 0xc5: len = 2; break;
-    }
-
-    if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
-    {
-        *(out++) = ch;
-        return;
-    }
-
-    unsigned char ch2 = *(chp++);
-    unsigned char ch3 = '\0';
-    if (len == 3)
-        ch3 = *(chp++);
-
-    for (int i = 128; i < 256; i++)
-    {
-        unsigned char b1 = arr[i*6 + 1], b2 = arr[i*6 + 2], b3 = arr[i*6 + 3];
-        if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3))
-        {
-            *(out++) = (char)i;
-            return;
-        }
-    }
-
-    std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl;
-
-    *(out++) = ch; // Could not find glyph, just put whatever
-}
-
-std::string ToUTF8::getLegacyEnc(ToUTF8::FromType to)
-{
-    // Pick translation array
-    const char *arr;
-    switch (to)
-    {
-        case ToUTF8::WINDOWS_1252:
-        {
-            arr = ToUTF8::windows_1252;
-            break;
-        }
-        case ToUTF8::WINDOWS_1250:
-        {
-            arr = ToUTF8::windows_1250;
-            break;
-        }
-        case ToUTF8::WINDOWS_1251:
-        {
-            arr = ToUTF8::windows_1251;
-            break;
-        }
-        default:
-        {
-            assert(0);
-        }
-    }
-
-    // Double check that the input string stops at some point (it might
-    // contain zero terminators before this, inside its own data, which
-    // is also ok.)
-    char* input = &buf[0];
-    assert(input[size] == 0);
-
-    // TODO: The rest of this function is designed for single-character
-    // input encodings only. It also assumes that the input the input
-    // encoding shares its first 128 values (0-127) with ASCII. These
-    // conditions must be checked again if you add more input encodings
-    // later.
-
-    // Compute output length, and check for pure ascii input at the same
-    // time.
-    bool ascii;
-    size_t outlen = getLength2(arr, input, ascii);
-
-    // If we're pure ascii, then don't bother converting anything.
-    if(ascii)
-        return std::string(input, outlen);
-
-    // Make sure the output is large enough
-    resize(output, outlen);
-    char *out = &output[0];
-
-    // Translate
-    while(*input)
-        copyFromArray2(arr, input, out);
-
-    // Make sure that we wrote the correct number of bytes
-    assert((out-&output[0]) == (int)outlen);
-
-    // And make extra sure the output is null terminated
-    assert(output.size() > outlen);
-    assert(output[outlen] == 0);
-
-    // Return a string
-    return std::string(&output[0], outlen);
-}
-
 ToUTF8::FromType ToUTF8::calculateEncoding(const std::string& encodingName)
 {
     if (encodingName == "win1250")
diff --git a/components/to_utf8/to_utf8.hpp b/components/to_utf8/to_utf8.hpp
index 6877e2dc1..bfba8a1ac 100644
--- a/components/to_utf8/to_utf8.hpp
+++ b/components/to_utf8/to_utf8.hpp
@@ -16,15 +16,6 @@ namespace ToUTF8
             // probably others)
     };
 
-    // Return a writable buffer of at least 'size' bytes. The buffer
-    // does not have to be freed.
-    char* getBuffer(int size);
-
-    // Convert the previously written buffer to UTF8 from the given code
-    // page.
-    std::string getUtf8(FromType from);
-    std::string getLegacyEnc(FromType to);
-
     FromType calculateEncoding(const std::string& encodingName);
     std::string encodingUsingMessage(const std::string& encodingName);