components/to_utf8: add class Utf8Encoder

2025-12-13 21:13:06 +00:00 · 2013-01-02 23:02:13 +01:00 · 2013-01-02 23:02:13 +01:00 · 740e2b5769
commit 740e2b5769
parent 5c007cd527
2 changed files with 563 additions and 247 deletions
--- a/components/to_utf8/to_utf8.cpp
+++ b/components/to_utf8/to_utf8.cpp
@ -2,6 +2,8 @@
 #include <vector>
 #include <cassert>
 #include <iostream>
 #include <iomanip>
 /* This file contains the code to translate from WINDOWS-1252 (native
   charset used in English version of Morrowind) to UTF-8. The library
@ -46,334 +48,611 @@ static std::vector<char> buf    (50*1024);
 static std::vector<char> output (50*1024);
 static int size;
-// Make sure the given vector is large enough for 'size' bytes,
+using namespace ToUTF8;
 Utf8Encoder::Utf8Encoder(void):
    mOutput(50*1024)
 {
 }
 void Utf8Encoder::setEncoding(const FromType sourceEncoding)
 {
    mEncoding = sourceEncoding;
    switch (mEncoding)
    {
        case ToUTF8::WINDOWS_1252:
        {
            translationArray = ToUTF8::windows_1252;
            break;
        }
        case ToUTF8::WINDOWS_1250:
        {
            translationArray = ToUTF8::windows_1250;
            break;
        }
        case ToUTF8::WINDOWS_1251:
        {
            translationArray = ToUTF8::windows_1251;
            break;
        }
        default:
        {
            assert(0);
        }
    }
 }
 std::string Utf8Encoder::getUtf8(const char* input, int size)
 {
    // Double check that the input string stops at some point (it might
    // contain zero terminators before this, inside its own data, which
    // is also ok.)
    assert(input[size] == 0);
    // TODO: The rest of this function is designed for single-character
    // input encodings only. It also assumes that the input the input
    // encoding shares its first 128 values (0-127) with ASCII. These
    // conditions must be checked again if you add more input encodings
    // later.
    // Compute output length, and check for pure ascii input at the same
    // time.
    bool ascii;
    size_t outlen = getLength(input, ascii);
    // If we're pure ascii, then don't bother converting anything.
    if(ascii)
        return std::string(input, outlen);
    // Make sure the output is large enough
    resize(outlen);
    char *out = &mOutput[0];
    // Translate
    while (*input)
        copyFromArray(*(input++), out);
    // Make sure that we wrote the correct number of bytes
    assert((out-&mOutput[0]) == (int)outlen);
    // And make extra sure the output is null terminated
    assert(mOutput.size() > outlen);
    assert(mOutput[outlen] == 0);
    // Return a string
    return std::string(&mOutput[0], outlen);
 }
 std::string Utf8Encoder::getLegacyEnc(const char *input, int size)
 {
    // Double check that the input string stops at some point (it might
    // contain zero terminators before this, inside its own data, which
    // is also ok.)
    assert(input[size] == 0);
    // TODO: The rest of this function is designed for single-character
    // input encodings only. It also assumes that the input the input
    // encoding shares its first 128 values (0-127) with ASCII. These
    // conditions must be checked again if you add more input encodings
    // later.
    // Compute output length, and check for pure ascii input at the same
    // time.
    bool ascii;
    size_t outlen = getLength2(input, ascii);
    // If we're pure ascii, then don't bother converting anything.
    if(ascii)
        return std::string(input, outlen);
    // Make sure the output is large enough
    resize(outlen);
    char *out = &mOutput[0];
    // Translate
    while(*input)
        copyFromArray2(input, out);
    // Make sure that we wrote the correct number of bytes
    assert((out-&mOutput[0]) == (int)outlen);
    // And make extra sure the output is null terminated
    assert(mOutput.size() > outlen);
    assert(mOutput[outlen] == 0);
    // Return a string
    return std::string(&mOutput[0], outlen);
 }
 // Make sure the output vector is large enough for 'size' bytes,
 // including a terminating zero after it.
 void Utf8Encoder::resize(size_t size)
 {
    if (mOutput.size() <= size)
        // Add some extra padding to reduce the chance of having to resize
        // again later.
        mOutput.resize(3*size);
    // And make sure the string is zero terminated
    mOutput[size] = 0;
 }
 /** Get the total length length needed to decode the given string with
  the given translation array. The arrays are encoded with 6 bytes
  per character, with the first giving the length and the next 5 the
  actual data.
  The function serves a dual purpose for optimization reasons: it
  checks if the input is pure ascii (all values are <= 127). If this
  is the case, then the ascii parameter is set to true, and the
  caller can optimize for this case.
 */
 size_t Utf8Encoder::getLength(const char* input, bool &ascii)
 {
    ascii = true;
    size_t len = 0;
    const char* ptr = input;
    unsigned char inp = *ptr;
    // Do away with the ascii part of the string first (this is almost
    // always the entire string.)
    while (inp && inp < 128)
        inp = *(++ptr);
    len += (ptr-input);
    // If we're not at the null terminator at this point, then there
    // were some non-ascii characters to deal with. Go to slow-mode for
    // the rest of the string.
    if (inp)
    {
        ascii = false;
        while (inp)
        {
            // Find the translated length of this character in the
            // lookup table.
            len += translationArray[inp*6];
            inp = *(++ptr);
        }
    }
    return len;
 }
 // Translate one character 'ch' using the translation array 'arr', and
 // advance the output pointer accordingly.
 void Utf8Encoder::copyFromArray(unsigned char ch, char* &out)
 {
    // Optimize for ASCII values
    if (ch < 128)
    {
        *(out++) = ch;
        return;
    }
    const char *in = translationArray + ch*6;
    int len = *(in++);
    for (int i=0; i<len; i++)
        *(out++) = *(in++);
 }
 size_t Utf8Encoder::getLength2(const char* input, bool &ascii)
 {
    ascii = true;
    size_t len = 0;
    const char* ptr = input;
    unsigned char inp = *ptr;
    // Do away with the ascii part of the string first (this is almost
    // always the entire string.)
    while (inp && inp < 128)
        inp = *(++ptr);
    len += (ptr-input);
    // If we're not at the null terminator at this point, then there
    // were some non-ascii characters to deal with. Go to slow-mode for
    // the rest of the string.
    if (inp)
    {
        ascii = false;
        while(inp)
        {
            len += 1;
            // Find the translated length of this character in the
            // lookup table.
            switch(inp)
            {
                case 0xe2: len -= 2; break;
                case 0xc2:
                case 0xcb:
                case 0xc4:
                case 0xc6:
                case 0xc3:
                case 0xd0:
                case 0xd1:
                case 0xd2:
                case 0xc5: len -= 1; break;
            }
            inp = *(++ptr);
        }
    }
    return len;
 }
 void Utf8Encoder::copyFromArray2(const char*& chp, char* &out)
 {
    unsigned char ch = *(chp++);
    // Optimize for ASCII values
    if (ch < 128)
    {
        *(out++) = ch;
        return;
    }
    int len = 1;
    switch (ch)
    {
        case 0xe2: len = 3; break;
        case 0xc2:
        case 0xcb:
        case 0xc4:
        case 0xc6:
        case 0xc3:
        case 0xd0:
        case 0xd1:
        case 0xd2:
        case 0xc5: len = 2; break;
    }
    if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
    {
        *(out++) = ch;
        return;
    }
    unsigned char ch2 = *(chp++);
    unsigned char ch3 = '\0';
    if (len == 3)
        ch3 = *(chp++);
    for (int i = 128; i < 256; i++)
    {
        unsigned char b1 = translationArray[i*6 + 1], b2 = translationArray[i*6 + 2], b3 = translationArray[i*6 + 3];
        if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3))
        {
            *(out++) = (char)i;
            return;
        }
    }
    std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl;
    *(out++) = ch; // Could not find glyph, just put whatever
 }
 static void resize(std::vector<char> &buf, size_t size)
 {
-  if(buf.size() <= size)
+    if(buf.size() <= size)
-    // Add some extra padding to reduce the chance of having to resize
+        // Add some extra padding to reduce the chance of having to resize
-    // again later.
+        // again later.
-    buf.resize(3*size);
+        buf.resize(3*size);
-  // And make sure the string is zero terminated
+    // And make sure the string is zero terminated
-  buf[size] = 0;
+    buf[size] = 0;
 }
 // This is just used to spew out a reusable input buffer for the
 // conversion process.
 char *ToUTF8::getBuffer(int s)
 {
-  // Remember the requested size
+    // Remember the requested size
-  size = s;
+    size = s;
-  resize(buf, size);
+    resize(buf, size);
-  return &buf[0];
+    return &buf[0];
 }
 /** Get the total length length needed to decode the given string with
-    the given translation array. The arrays are encoded with 6 bytes
+  the given translation array. The arrays are encoded with 6 bytes
-    per character, with the first giving the length and the next 5 the
+  per character, with the first giving the length and the next 5 the
-    actual data.
+  actual data.
-    The function serves a dual purpose for optimization reasons: it
+  The function serves a dual purpose for optimization reasons: it
-    checks if the input is pure ascii (all values are <= 127). If this
+  checks if the input is pure ascii (all values are <= 127). If this
-    is the case, then the ascii parameter is set to true, and the
+  is the case, then the ascii parameter is set to true, and the
-    caller can optimize for this case.
+  caller can optimize for this case.
 */
 static size_t getLength(const char *arr, const char* input, bool &ascii)
 {
-  ascii = true;
+    ascii = true;
-  size_t len = 0;
+    size_t len = 0;
-  const char* ptr = input;
+    const char* ptr = input;
-  unsigned char inp = *ptr;
+    unsigned char inp = *ptr;
-  // Do away with the ascii part of the string first (this is almost
+    // Do away with the ascii part of the string first (this is almost
-  // always the entire string.)
+    // always the entire string.)
-  while(inp && inp < 128)
+    while(inp && inp < 128)
-    inp = *(++ptr);
+        inp = *(++ptr);
-  len += (ptr-input);
+    len += (ptr-input);
-  // If we're not at the null terminator at this point, then there
+    // If we're not at the null terminator at this point, then there
-  // were some non-ascii characters to deal with. Go to slow-mode for
+    // were some non-ascii characters to deal with. Go to slow-mode for
-  // the rest of the string.
+    // the rest of the string.
-  if(inp)
+    if(inp)
    {
-      ascii = false;
+        ascii = false;
-      while(inp)
+        while(inp)
        {
-          // Find the translated length of this character in the
+            // Find the translated length of this character in the
-          // lookup table.
+            // lookup table.
-          len += arr[inp*6];
+            len += arr[inp*6];
-          inp = *(++ptr);
+            inp = *(++ptr);
        }
    }
-  return len;
+    return len;
 }
 // Translate one character 'ch' using the translation array 'arr', and
 // advance the output pointer accordingly.
 static void copyFromArray(const char *arr, unsigned char ch, char* &out)
 {
-  // Optimize for ASCII values
+    // Optimize for ASCII values
-  if(ch < 128)
+    if(ch < 128)
    {
-      *(out++) = ch;
+        *(out++) = ch;
-      return;
+        return;
    }
-  const char *in = arr + ch*6;
+    const char *in = arr + ch*6;
-  int len = *(in++);
+    int len = *(in++);
-  for(int i=0; i<len; i++)
+    for(int i=0; i<len; i++)
-    *(out++) = *(in++);
+        *(out++) = *(in++);
 }
 std::string ToUTF8::getUtf8(ToUTF8::FromType from)
 {
-  // Pick translation array
+    // Pick translation array
-  const char *arr;
+    const char *arr;
-  switch (from)
+    switch (from)
  {
    case ToUTF8::WINDOWS_1252:
    {
-      arr = ToUTF8::windows_1252;
+        case ToUTF8::WINDOWS_1252:
-      break;
+        {
            arr = ToUTF8::windows_1252;
            break;
        }
        case ToUTF8::WINDOWS_1250:
        {
            arr = ToUTF8::windows_1250;
            break;
        }
        case ToUTF8::WINDOWS_1251:
        {
            arr = ToUTF8::windows_1251;
            break;
        }
        default:
        {
            assert(0);
        }
    }
    case ToUTF8::WINDOWS_1250:
    {
      arr = ToUTF8::windows_1250;
      break;
    }
    case ToUTF8::WINDOWS_1251:
    {
      arr = ToUTF8::windows_1251;
      break;
    }
    default:
    {
      assert(0);
    }
  }
-  // Double check that the input string stops at some point (it might
+    // Double check that the input string stops at some point (it might
-  // contain zero terminators before this, inside its own data, which
+    // contain zero terminators before this, inside its own data, which
-  // is also ok.)
+    // is also ok.)
-  const char* input = &buf[0];
+    const char* input = &buf[0];
-  assert(input[size] == 0);
+    assert(input[size] == 0);
-  // TODO: The rest of this function is designed for single-character
+    // TODO: The rest of this function is designed for single-character
-  // input encodings only. It also assumes that the input the input
+    // input encodings only. It also assumes that the input the input
-  // encoding shares its first 128 values (0-127) with ASCII. These
+    // encoding shares its first 128 values (0-127) with ASCII. These
-  // conditions must be checked again if you add more input encodings
+    // conditions must be checked again if you add more input encodings
-  // later.
+    // later.
-  // Compute output length, and check for pure ascii input at the same
+    // Compute output length, and check for pure ascii input at the same
-  // time.
+    // time.
-  bool ascii;
+    bool ascii;
-  size_t outlen = getLength(arr, input, ascii);
+    size_t outlen = getLength(arr, input, ascii);
-  // If we're pure ascii, then don't bother converting anything.
+    // If we're pure ascii, then don't bother converting anything.
-  if(ascii)
+    if(ascii)
-    return std::string(input, outlen);
+        return std::string(input, outlen);
-  // Make sure the output is large enough
+    // Make sure the output is large enough
-  resize(output, outlen);
+    resize(output, outlen);
-  char *out = &output[0];
+    char *out = &output[0];
-  // Translate
+    // Translate
-  while(*input)
+    while(*input)
-    copyFromArray(arr, *(input++), out);
+        copyFromArray(arr, *(input++), out);
-  // Make sure that we wrote the correct number of bytes
+    // Make sure that we wrote the correct number of bytes
-  assert((out-&output[0]) == (int)outlen);
+    assert((out-&output[0]) == (int)outlen);
-  // And make extra sure the output is null terminated
+    // And make extra sure the output is null terminated
-  assert(output.size() > outlen);
+    assert(output.size() > outlen);
-  assert(output[outlen] == 0);
+    assert(output[outlen] == 0);
-  // Return a string
+    // Return a string
-  return std::string(&output[0], outlen);
+    return std::string(&output[0], outlen);
 }
 static size_t getLength2(const char *arr, const char* input, bool &ascii)
 {
-  ascii = true;
+    ascii = true;
-  size_t len = 0;
+    size_t len = 0;
-  const char* ptr = input;
+    const char* ptr = input;
-  unsigned char inp = *ptr;
+    unsigned char inp = *ptr;
-  // Do away with the ascii part of the string first (this is almost
+    // Do away with the ascii part of the string first (this is almost
-  // always the entire string.)
+    // always the entire string.)
-  while(inp && inp < 128)
+    while(inp && inp < 128)
-    inp = *(++ptr);
+        inp = *(++ptr);
-  len += (ptr-input);
+    len += (ptr-input);
-  // If we're not at the null terminator at this point, then there
+    // If we're not at the null terminator at this point, then there
-  // were some non-ascii characters to deal with. Go to slow-mode for
+    // were some non-ascii characters to deal with. Go to slow-mode for
-  // the rest of the string.
+    // the rest of the string.
-  if(inp)
+    if(inp)
    {
-      ascii = false;
+        ascii = false;
-      while(inp)
+        while(inp)
        {
            len += 1;
-          // Find the translated length of this character in the
+            // Find the translated length of this character in the
-          // lookup table.
+            // lookup table.
            switch(inp)
            {
-            case 0xe2: len -= 2; break;
+                case 0xe2: len -= 2; break;
-            case 0xc2:
+                case 0xc2:
-            case 0xcb:
+                case 0xcb:
-            case 0xc4:
+                case 0xc4:
-            case 0xc6:
+                case 0xc6:
-            case 0xc3:
+                case 0xc3:
-            case 0xd0:
+                case 0xd0:
-            case 0xd1:
+                case 0xd1:
-            case 0xd2:
+                case 0xd2:
-            case 0xc5: len -= 1; break;
+                case 0xc5: len -= 1; break;
            }
-          inp = *(++ptr);
+            inp = *(++ptr);
        }
    }
-  return len;
+    return len;
 }
 #include <iostream>
 #include <iomanip>
 static void copyFromArray2(const char *arr, char*& chp, char* &out)
 {
    unsigned char ch = *(chp++);
-  // Optimize for ASCII values
+    // Optimize for ASCII values
-  if(ch < 128)
+    if(ch < 128)
    {
-      *(out++) = ch;
+        *(out++) = ch;
-      return;
+        return;
    }
-  int len = 1;
+    int len = 1;
-  switch (ch)
+    switch (ch)
-  {
+    {
-  case 0xe2: len = 3; break;
+        case 0xe2: len = 3; break;
-  case 0xc2:
+        case 0xc2:
-  case 0xcb:
+        case 0xcb:
-  case 0xc4:
+        case 0xc4:
-  case 0xc6:
+        case 0xc6:
-  case 0xc3:
+        case 0xc3:
-  case 0xd0:
+        case 0xd0:
-  case 0xd1:
+        case 0xd1:
-  case 0xd2:
+        case 0xd2:
-  case 0xc5: len = 2; break;
+        case 0xc5: len = 2; break;
-  }
+    }
-  if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
+    if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
-  {
+    {
-      *(out++) = ch;
+        *(out++) = ch;
-      return;
+        return;
-  }
+    }
-  unsigned char ch2 = *(chp++);
+    unsigned char ch2 = *(chp++);
-  unsigned char ch3 = '\0';
+    unsigned char ch3 = '\0';
-  if (len == 3)
+    if (len == 3)
-      ch3 = *(chp++);
+        ch3 = *(chp++);
-  for (int i = 128; i < 256; i++)
+    for (int i = 128; i < 256; i++)
-  {
+    {
-      unsigned char b1 = arr[i*6 + 1], b2 = arr[i*6 + 2], b3 = arr[i*6 + 3];
+        unsigned char b1 = arr[i*6 + 1], b2 = arr[i*6 + 2], b3 = arr[i*6 + 3];
-      if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3))
+        if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3))
-      {
+        {
-          *(out++) = (char)i;
+            *(out++) = (char)i;
-          return;
+            return;
-      }
+        }
-  }
+    }
-  std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl;
+    std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl;
-  *(out++) = ch; // Could not find glyph, just put whatever
+    *(out++) = ch; // Could not find glyph, just put whatever
 }
 std::string ToUTF8::getLegacyEnc(ToUTF8::FromType to)
 {
-  // Pick translation array
+    // Pick translation array
-  const char *arr;
+    const char *arr;
-  switch (to)
+    switch (to)
  {
    case ToUTF8::WINDOWS_1252:
    {
-      arr = ToUTF8::windows_1252;
+        case ToUTF8::WINDOWS_1252:
-      break;
+        {
            arr = ToUTF8::windows_1252;
            break;
        }
        case ToUTF8::WINDOWS_1250:
        {
            arr = ToUTF8::windows_1250;
            break;
        }
        case ToUTF8::WINDOWS_1251:
        {
            arr = ToUTF8::windows_1251;
            break;
        }
        default:
        {
            assert(0);
        }
    }
    case ToUTF8::WINDOWS_1250:
    {
      arr = ToUTF8::windows_1250;
      break;
    }
    case ToUTF8::WINDOWS_1251:
    {
      arr = ToUTF8::windows_1251;
      break;
    }
    default:
    {
      assert(0);
    }
  }
-  // Double check that the input string stops at some point (it might
+    // Double check that the input string stops at some point (it might
-  // contain zero terminators before this, inside its own data, which
+    // contain zero terminators before this, inside its own data, which
-  // is also ok.)
+    // is also ok.)
-  char* input = &buf[0];
+    char* input = &buf[0];
-  assert(input[size] == 0);
+    assert(input[size] == 0);
-  // TODO: The rest of this function is designed for single-character
+    // TODO: The rest of this function is designed for single-character
-  // input encodings only. It also assumes that the input the input
+    // input encodings only. It also assumes that the input the input
-  // encoding shares its first 128 values (0-127) with ASCII. These
+    // encoding shares its first 128 values (0-127) with ASCII. These
-  // conditions must be checked again if you add more input encodings
+    // conditions must be checked again if you add more input encodings
-  // later.
+    // later.
-  // Compute output length, and check for pure ascii input at the same
+    // Compute output length, and check for pure ascii input at the same
-  // time.
+    // time.
-  bool ascii;
+    bool ascii;
-  size_t outlen = getLength2(arr, input, ascii);
+    size_t outlen = getLength2(arr, input, ascii);
-  // If we're pure ascii, then don't bother converting anything.
+    // If we're pure ascii, then don't bother converting anything.
-  if(ascii)
+    if(ascii)
-      return std::string(input, outlen);
+        return std::string(input, outlen);
-  // Make sure the output is large enough
+    // Make sure the output is large enough
-  resize(output, outlen);
+    resize(output, outlen);
-  char *out = &output[0];
+    char *out = &output[0];
-  // Translate
+    // Translate
-  while(*input)
+    while(*input)
-    copyFromArray2(arr, input, out);
+        copyFromArray2(arr, input, out);
-  // Make sure that we wrote the correct number of bytes
+    // Make sure that we wrote the correct number of bytes
-  assert((out-&output[0]) == (int)outlen);
+    assert((out-&output[0]) == (int)outlen);
-  // And make extra sure the output is null terminated
+    // And make extra sure the output is null terminated
-  assert(output.size() > outlen);
+    assert(output.size() > outlen);
-  assert(output[outlen] == 0);
+    assert(output[outlen] == 0);
-  // Return a string
+    // Return a string
-  return std::string(&output[0], outlen);
+    return std::string(&output[0], outlen);
 }
 ToUTF8::FromType ToUTF8::calculateEncoding(const std::string& encodingName)
 {
-  if (encodingName == "win1250")
+    if (encodingName == "win1250")
-    return ToUTF8::WINDOWS_1250;
+        return ToUTF8::WINDOWS_1250;
-  else if (encodingName == "win1251")
+    else if (encodingName == "win1251")
-    return ToUTF8::WINDOWS_1251;
+        return ToUTF8::WINDOWS_1251;
-  else
+    else
-    return ToUTF8::WINDOWS_1252;
+        return ToUTF8::WINDOWS_1252;
 }
 std::string ToUTF8::encodingUsingMessage(const std::string& encodingName)
 {
-  if (encodingName == "win1250")
+    if (encodingName == "win1250")
-    return "Using Central and Eastern European font encoding.";
+        return "Using Central and Eastern European font encoding.";
-  else if (encodingName == "win1251")
+    else if (encodingName == "win1251")
-    return "Using Cyrillic font encoding.";
+        return "Using Cyrillic font encoding.";
-  else
+    else
-    return "Using default (English) font encoding.";
+        return "Using default (English) font encoding.";
 }
--- a/components/to_utf8/to_utf8.hpp
+++ b/components/to_utf8/to_utf8.hpp
@ -2,29 +2,66 @@
 #define COMPONENTS_TOUTF8_H
 #include <string>
 #include <cstring>
 #include <vector>
 namespace ToUTF8
 {
-  // These are all the currently supported code pages
+    // These are all the currently supported code pages
-  enum FromType
+    enum FromType
    {
-      WINDOWS_1250,      // Central ane Eastern European languages
+        WINDOWS_1250,      // Central ane Eastern European languages
-      WINDOWS_1251,      // Cyrillic languages
+        WINDOWS_1251,      // Cyrillic languages
-      WINDOWS_1252       // Used by English version of Morrowind (and
+        WINDOWS_1252       // Used by English version of Morrowind (and
-                         // probably others)
+            // probably others)
    };
-  // Return a writable buffer of at least 'size' bytes. The buffer
+    // Return a writable buffer of at least 'size' bytes. The buffer
-  // does not have to be freed.
+    // does not have to be freed.
-  char* getBuffer(int size);
+    char* getBuffer(int size);
-  // Convert the previously written buffer to UTF8 from the given code
+    // Convert the previously written buffer to UTF8 from the given code
-  // page.
+    // page.
-  std::string getUtf8(FromType from);
+    std::string getUtf8(FromType from);
-  std::string getLegacyEnc(FromType to);
+    std::string getLegacyEnc(FromType to);
-  FromType calculateEncoding(const std::string& encodingName);
+    FromType calculateEncoding(const std::string& encodingName);
-  std::string encodingUsingMessage(const std::string& encodingName);
+    std::string encodingUsingMessage(const std::string& encodingName);
    // class
    class Utf8Encoder
    {
        public:
            Utf8Encoder(void);
            void setEncoding(const FromType sourceEncoding);
            // Convert to UTF8 from the previously given code page.
            std::string getUtf8(const char *input, int size);
            inline std::string getUtf8(const std::string &str)
            {
                return getUtf8(str.c_str(), str.size());
            }
            std::string getLegacyEnc(const char *input, int size);
            inline std::string getLegacyEnc(const std::string &str)
            {
                return getLegacyEnc(str.c_str(), str.size());
            }
        private:
            void resize(size_t size);
            size_t getLength(const char* input, bool &ascii);
            void copyFromArray(unsigned char chp, char* &out);
            size_t getLength2(const char* input, bool &ascii);
            void copyFromArray2(const char*& chp, char* &out);
            FromType mEncoding;
            std::vector<char> mOutput;
            int mSize;
            char* translationArray;
    };
 }
 #endif