2010-08-18 16:45:44 +00:00
|
|
|
#ifndef COMPONENTS_TOUTF8_H
|
|
|
|
#define COMPONENTS_TOUTF8_H
|
|
|
|
|
2013-01-02 22:02:13 +00:00
|
|
|
#include <cstring>
|
2022-09-22 18:26:05 +00:00
|
|
|
#include <string>
|
2022-02-12 10:52:52 +00:00
|
|
|
#include <string_view>
|
2022-10-09 10:39:43 +00:00
|
|
|
#include <utility>
|
2010-08-18 16:45:44 +00:00
|
|
|
|
|
|
|
namespace ToUTF8
|
|
|
|
{
|
2013-01-02 22:02:13 +00:00
|
|
|
// These are all the currently supported code pages
|
|
|
|
enum FromType
|
2010-08-18 16:45:44 +00:00
|
|
|
{
|
2022-09-22 18:26:05 +00:00
|
|
|
WINDOWS_1250, // Central ane Eastern European languages
|
|
|
|
WINDOWS_1251, // Cyrillic languages
|
|
|
|
WINDOWS_1252, // Used by English version of Morrowind (and
|
|
|
|
// probably others)
|
|
|
|
CP437 // Used for fonts (*.fnt) if data files encoding is 1252. Otherwise, uses the same encoding as the data
|
|
|
|
// files.
|
2010-08-18 16:45:44 +00:00
|
|
|
};
|
|
|
|
|
2022-02-14 21:26:01 +00:00
|
|
|
enum class BufferAllocationPolicy
|
|
|
|
{
|
|
|
|
FitToRequiredSize,
|
|
|
|
UseGrowFactor,
|
|
|
|
};
|
|
|
|
|
2022-05-21 19:44:47 +00:00
|
|
|
FromType calculateEncoding(std::string_view encodingName);
|
|
|
|
std::string encodingUsingMessage(std::string_view encodingName);
|
2013-01-02 22:02:13 +00:00
|
|
|
|
2022-02-14 21:26:01 +00:00
|
|
|
class StatelessUtf8Encoder
|
|
|
|
{
|
2022-09-22 18:26:05 +00:00
|
|
|
public:
|
|
|
|
explicit StatelessUtf8Encoder(FromType sourceEncoding);
|
2022-02-14 21:26:01 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
/// Convert to UTF8 from the previously given code page.
|
|
|
|
/// Returns a view to passed buffer that will be resized to fit output if it's too small.
|
|
|
|
std::string_view getUtf8(
|
|
|
|
std::string_view input, BufferAllocationPolicy bufferAllocationPolicy, std::string& buffer) const;
|
2022-02-14 21:26:01 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
/// Convert from UTF-8 to sourceEncoding.
|
|
|
|
/// Returns a view to passed buffer that will be resized to fit output if it's too small.
|
|
|
|
std::string_view getLegacyEnc(
|
|
|
|
std::string_view input, BufferAllocationPolicy bufferAllocationPolicy, std::string& buffer) const;
|
2022-02-14 21:26:01 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
private:
|
|
|
|
inline std::pair<std::size_t, bool> getLength(std::string_view input) const;
|
|
|
|
inline void copyFromArray(unsigned char chp, char*& out) const;
|
|
|
|
inline std::pair<std::size_t, bool> getLengthLegacyEnc(std::string_view input) const;
|
|
|
|
inline void copyFromArrayLegacyEnc(
|
|
|
|
std::string_view::iterator& chp, std::string_view::iterator end, char*& out) const;
|
2022-02-14 21:26:01 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
const std::basic_string_view<signed char> mTranslationArray;
|
2022-02-14 21:26:01 +00:00
|
|
|
};
|
2013-01-02 22:02:13 +00:00
|
|
|
|
|
|
|
class Utf8Encoder
|
|
|
|
{
|
2022-09-22 18:26:05 +00:00
|
|
|
public:
|
|
|
|
explicit Utf8Encoder(FromType sourceEncoding);
|
2013-01-02 22:02:13 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
/// Convert to UTF8 from the previously given code page.
|
|
|
|
/// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
|
|
|
|
/// ASCII-only string. Otherwise returns a view to the input.
|
|
|
|
std::string_view getUtf8(std::string_view input);
|
2022-02-12 11:00:35 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
/// Convert from UTF-8 to sourceEncoding.
|
|
|
|
/// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not
|
|
|
|
/// ASCII-only string. Otherwise returns a view to the input.
|
|
|
|
std::string_view getLegacyEnc(std::string_view input);
|
2013-01-02 22:02:13 +00:00
|
|
|
|
2024-01-17 17:10:42 +00:00
|
|
|
const StatelessUtf8Encoder& getStatelessEncoder() const { return mImpl; }
|
2022-12-28 19:48:25 +00:00
|
|
|
|
2022-09-22 18:26:05 +00:00
|
|
|
private:
|
|
|
|
std::string mBuffer;
|
|
|
|
StatelessUtf8Encoder mImpl;
|
2013-01-02 22:02:13 +00:00
|
|
|
};
|
2010-08-18 16:45:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|