openmw-tes3coop/components/to_utf8/to_utf8.hpp
Douglas Mencken df5919f2c5 Use `signed char' explicitly where needed. It is important because:
- It is implementation-dependent if plain `char' signed or not.
- C standard defines three *distinct* types: char, signed char,
  and unsigned char.
- Assuming that char is always unsigned or signed can lead to
  compile-time and run-time errors.

You can also use int8_t, but then it would be less obvious for developers
to never assume that char is always unsigned (or always signed).

Conflicts:

	components/esm/loadcell.hpp
2013-02-15 20:28:14 +01:00

54 lines
1.6 KiB
C++

#ifndef COMPONENTS_TOUTF8_H
#define COMPONENTS_TOUTF8_H
#include <string>
#include <cstring>
#include <vector>
namespace ToUTF8
{
// These are all the currently supported code pages
enum FromType
{
WINDOWS_1250, // Central ane Eastern European languages
WINDOWS_1251, // Cyrillic languages
WINDOWS_1252 // Used by English version of Morrowind (and
// probably others)
};
FromType calculateEncoding(const std::string& encodingName);
std::string encodingUsingMessage(const std::string& encodingName);
// class
class Utf8Encoder
{
public:
Utf8Encoder(FromType sourceEncoding);
// Convert to UTF8 from the previously given code page.
std::string getUtf8(const char *input, int size);
inline std::string getUtf8(const std::string &str)
{
return getUtf8(str.c_str(), str.size());
}
std::string getLegacyEnc(const char *input, int size);
inline std::string getLegacyEnc(const std::string &str)
{
return getLegacyEnc(str.c_str(), str.size());
}
private:
void resize(size_t size);
size_t getLength(const char* input, bool &ascii);
void copyFromArray(unsigned char chp, char* &out);
size_t getLength2(const char* input, bool &ascii);
void copyFromArray2(const char*& chp, char* &out);
std::vector<char> mOutput;
signed char* translationArray;
};
}
#endif