mirror of
				https://github.com/TES3MP/openmw-tes3mp.git
				synced 2025-10-31 20:56:42 +00:00 
			
		
		
		
	Merge remote-tracking branch 'potatoesmaster/to_utf8-rewrite'
This commit is contained in:
		
						commit
						25815ab8f7
					
				
					 23 changed files with 423 additions and 365 deletions
				
			
		|  | @ -165,23 +165,12 @@ bool parseOptions (int argc, char** argv, Arguments &info) | |||
| 
 | ||||
|     // Font encoding settings
 | ||||
|     info.encoding = variables["encoding"].as<std::string>(); | ||||
|     if (info.encoding == "win1250") | ||||
|     if(info.encoding != "win1250" && info.encoding != "win1251" && info.encoding != "win1252") | ||||
|     { | ||||
|         std::cout << "Using Central and Eastern European font encoding." << std::endl; | ||||
|     } | ||||
|     else if (info.encoding == "win1251") | ||||
|     { | ||||
|         std::cout << "Using Cyrillic font encoding." << std::endl; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         if(info.encoding != "win1252") | ||||
|         { | ||||
|             std::cout << info.encoding << " is not a valid encoding option." << std::endl; | ||||
|             info.encoding = "win1252"; | ||||
|         } | ||||
|         std::cout << "Using default (English) font encoding." << std::endl; | ||||
|         std::cout << info.encoding << " is not a valid encoding option." << std::endl; | ||||
|         info.encoding = "win1252"; | ||||
|     } | ||||
|     std::cout << ToUTF8::encodingUsingMessage(info.encoding) << std::endl; | ||||
| 
 | ||||
|     return true; | ||||
| } | ||||
|  | @ -262,7 +251,8 @@ void printRaw(ESM::ESMReader &esm) | |||
| int load(Arguments& info) | ||||
| { | ||||
|     ESM::ESMReader& esm = info.reader; | ||||
|     esm.setEncoding(ToUTF8::calculateEncoding(info.encoding)); | ||||
|     ToUTF8::Utf8Encoder encoder (ToUTF8::calculateEncoding(info.encoding)); | ||||
|     esm.setEncoder(&encoder); | ||||
| 
 | ||||
|     std::string filename = info.filename; | ||||
|     std::cout << "Loading file: " << filename << std::endl; | ||||
|  | @ -432,7 +422,8 @@ int clone(Arguments& info) | |||
|     std::cout << std::endl << "Saving records to: " << info.outname << "..." << std::endl; | ||||
| 
 | ||||
|     ESM::ESMWriter& esm = info.writer; | ||||
|     esm.setEncoding(info.encoding); | ||||
|     ToUTF8::Utf8Encoder encoder (ToUTF8::calculateEncoding(info.encoding)); | ||||
|     esm.setEncoder(&encoder); | ||||
|     esm.setAuthor(info.data.author); | ||||
|     esm.setDescription(info.data.description); | ||||
|     esm.setVersion(info.data.version); | ||||
|  |  | |||
|  | @ -272,7 +272,8 @@ void DataFilesModel::addMasters(const QString &path) | |||
|     foreach (const QString &path, dir.entryList()) { | ||||
|         try { | ||||
|             ESM::ESMReader fileReader; | ||||
|             fileReader.setEncoding(ToUTF8::calculateEncoding(mEncoding.toStdString())); | ||||
|             ToUTF8::Utf8Encoder encoder (ToUTF8::calculateEncoding(mEncoding.toStdString())); | ||||
|             fileReader.setEncoder(&encoder); | ||||
|             fileReader.open(dir.absoluteFilePath(path).toStdString()); | ||||
| 
 | ||||
|             ESM::ESMReader::MasterList mlist = fileReader.getMasters(); | ||||
|  | @ -335,7 +336,8 @@ void DataFilesModel::addPlugins(const QString &path) | |||
| 
 | ||||
|         try { | ||||
|             ESM::ESMReader fileReader; | ||||
|             fileReader.setEncoding(ToUTF8::calculateEncoding(mEncoding.toStdString())); | ||||
|             ToUTF8::Utf8Encoder encoder (ToUTF8::calculateEncoding(mEncoding.toStdString())); | ||||
|             fileReader.setEncoder(&encoder); | ||||
|             fileReader.open(dir.absoluteFilePath(path).toStdString()); | ||||
| 
 | ||||
|             ESM::ESMReader::MasterList mlist = fileReader.getMasters(); | ||||
|  |  | |||
|  | @ -649,11 +649,12 @@ MwIniImporter::multistrmap MwIniImporter::loadIniFile(std::string filename) { | |||
|     std::string section(""); | ||||
|     MwIniImporter::multistrmap map; | ||||
|     boost::iostreams::stream<boost::iostreams::file_source>file(filename.c_str()); | ||||
|     ToUTF8::Utf8Encoder encoder(mEncoding); | ||||
| 
 | ||||
|     std::string line; | ||||
|     while (std::getline(file, line)) { | ||||
| 
 | ||||
|         line = toUTF8(line); | ||||
|         line = encoder.getUtf8(line); | ||||
| 
 | ||||
|         // unify Unix-style and Windows file ending
 | ||||
|         if (!(line.empty()) && (line[line.length()-1]) == '\r') { | ||||
|  | @ -829,14 +830,6 @@ void MwIniImporter::writeToFile(boost::iostreams::stream<boost::iostreams::file_ | |||
|     } | ||||
| } | ||||
| 
 | ||||
| std::string MwIniImporter::toUTF8(const std::string &str) { | ||||
|     char *ptr = ToUTF8::getBuffer(str.length()); | ||||
|     strncpy(ptr, str.c_str(), str.length()); | ||||
| 
 | ||||
|     // Convert to UTF8 and return
 | ||||
|     return ToUTF8::getUtf8(mEncoding); | ||||
| } | ||||
| 
 | ||||
| void MwIniImporter::setInputEncoding(const ToUTF8::FromType &encoding) | ||||
| { | ||||
|   mEncoding = encoding; | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ | |||
| #include <vector> | ||||
| #include <exception> | ||||
| 
 | ||||
| #include "../../components/to_utf8/to_utf8.hpp" | ||||
| #include <components/to_utf8/to_utf8.hpp> | ||||
| 
 | ||||
| class MwIniImporter { | ||||
|   public: | ||||
|  |  | |||
|  | @ -331,11 +331,15 @@ void OMW::Engine::go() | |||
|     // cursor replacer (converts the cursor from the bsa so they can be used by mygui)
 | ||||
|     MWGui::CursorReplace replacer; | ||||
| 
 | ||||
|     // Create encoder
 | ||||
|     ToUTF8::Utf8Encoder encoder (mEncoding); | ||||
| 
 | ||||
|     // Create the world
 | ||||
|     mEnvironment.setWorld (new MWWorld::World (*mOgre, mFileCollections, mMaster, | ||||
|         mResDir, mCfgMgr.getCachePath(), mNewGame, mEncoding, mFallbackMap)); | ||||
|         mResDir, mCfgMgr.getCachePath(), mNewGame, &encoder, mFallbackMap)); | ||||
| 
 | ||||
|     //Load translation data
 | ||||
|     mTranslationDataStorage.setEncoder(&encoder); | ||||
|     mTranslationDataStorage.loadTranslationData(mFileCollections, mMaster); | ||||
| 
 | ||||
|     // Create window manager - this manages all the MW-specific GUI windows
 | ||||
|  | @ -494,7 +498,6 @@ void OMW::Engine::showFPS(int level) | |||
| void OMW::Engine::setEncoding(const ToUTF8::FromType& encoding) | ||||
| { | ||||
|     mEncoding = encoding; | ||||
|     mTranslationDataStorage.setEncoding (encoding); | ||||
| } | ||||
| 
 | ||||
| void OMW::Engine::setFallbackValues(std::map<std::string,std::string> fallbackMap) | ||||
|  |  | |||
|  | @ -170,7 +170,7 @@ namespace MWWorld | |||
|     World::World (OEngine::Render::OgreRenderer& renderer, | ||||
|         const Files::Collections& fileCollections, | ||||
|         const std::string& master, const boost::filesystem::path& resDir, const boost::filesystem::path& cacheDir, bool newGame, | ||||
|         const ToUTF8::FromType& encoding, std::map<std::string,std::string> fallbackMap) | ||||
|         ToUTF8::Utf8Encoder* encoder, std::map<std::string,std::string> fallbackMap) | ||||
|     : mPlayer (0), mLocalScripts (mStore), mGlobalVariables (0), | ||||
|       mSky (true), mCells (mStore, mEsm), | ||||
|       mNumFacing(0) | ||||
|  | @ -187,7 +187,7 @@ namespace MWWorld | |||
|         std::cout << "Loading ESM " << masterPath.string() << "\n"; | ||||
| 
 | ||||
|         // This parses the ESM file and loads a sample cell
 | ||||
|         mEsm.setEncoding(encoding); | ||||
|         mEsm.setEncoder(encoder); | ||||
|         mEsm.open (masterPath.string()); | ||||
|         mStore.load (mEsm); | ||||
| 
 | ||||
|  |  | |||
|  | @ -95,7 +95,7 @@ namespace MWWorld | |||
|             World (OEngine::Render::OgreRenderer& renderer, | ||||
|                 const Files::Collections& fileCollections, | ||||
|                 const std::string& master, const boost::filesystem::path& resDir, const boost::filesystem::path& cacheDir, bool newGame, | ||||
|                 const ToUTF8::FromType& encoding, std::map<std::string,std::string> fallbackMap); | ||||
|                 ToUTF8::Utf8Encoder* encoder, std::map<std::string,std::string> fallbackMap); | ||||
| 
 | ||||
|             virtual ~World(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -15,6 +15,11 @@ ESM_Context ESMReader::getContext() | |||
|     return mCtx; | ||||
| } | ||||
| 
 | ||||
| ESMReader::ESMReader(void): | ||||
|     mBuffer(50*1024) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| void ESMReader::restoreContext(const ESM_Context &rc) | ||||
| { | ||||
|     // Reopen the file if necessary
 | ||||
|  | @ -323,11 +328,21 @@ void ESMReader::getExact(void*x, int size) | |||
| 
 | ||||
| std::string ESMReader::getString(int size) | ||||
| { | ||||
|     char *ptr = ToUTF8::getBuffer(size); | ||||
|     mEsm->read(ptr, size); | ||||
|     size_t s = size; | ||||
|     if (mBuffer.size() <= s) | ||||
|         // Add some extra padding to reduce the chance of having to resize
 | ||||
|         // again later.
 | ||||
|         mBuffer.resize(3*s); | ||||
| 
 | ||||
|     // And make sure the string is zero terminated
 | ||||
|     mBuffer[s] = 0; | ||||
| 
 | ||||
|     // read ESM data
 | ||||
|     char *ptr = &mBuffer[0]; | ||||
|     getExact(ptr, size); | ||||
| 
 | ||||
|     // Convert to UTF8 and return
 | ||||
|     return ToUTF8::getUtf8(mEncoding); | ||||
|     return mEncoder->getUtf8(ptr, size); | ||||
| } | ||||
| 
 | ||||
| void ESMReader::fail(const std::string &msg) | ||||
|  | @ -345,9 +360,9 @@ void ESMReader::fail(const std::string &msg) | |||
|     throw std::runtime_error(ss.str()); | ||||
| } | ||||
| 
 | ||||
| void ESMReader::setEncoding(const ToUTF8::FromType& encoding) | ||||
| void ESMReader::setEncoder(ToUTF8::Utf8Encoder* encoder) | ||||
| { | ||||
|   mEncoding = encoding; | ||||
|     mEncoder = encoder; | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -20,6 +20,8 @@ class ESMReader | |||
| { | ||||
| public: | ||||
| 
 | ||||
|   ESMReader(void); | ||||
| 
 | ||||
|   /*************************************************************************
 | ||||
|    * | ||||
|    *  Public type definitions | ||||
|  | @ -233,8 +235,8 @@ public: | |||
|   /// Used for error handling
 | ||||
|   void fail(const std::string &msg); | ||||
| 
 | ||||
|   /// Sets font encoding for ESM strings
 | ||||
|   void setEncoding(const ToUTF8::FromType& encoding); | ||||
|   /// Sets font encoder for ESM strings
 | ||||
|   void setEncoder(ToUTF8::Utf8Encoder* encoder); | ||||
| 
 | ||||
| private: | ||||
|   Ogre::DataStreamPtr mEsm; | ||||
|  | @ -244,9 +246,12 @@ private: | |||
|   // Special file signifier (see SpecialFile enum above)
 | ||||
|   int mSpf; | ||||
| 
 | ||||
|   // Buffer for ESM strings
 | ||||
|   std::vector<char> mBuffer; | ||||
| 
 | ||||
|   SaveData mSaveData; | ||||
|   MasterList mMasters; | ||||
|   ToUTF8::FromType mEncoding; | ||||
|   ToUTF8::Utf8Encoder* mEncoder; | ||||
| }; | ||||
| } | ||||
| #endif | ||||
|  |  | |||
|  | @ -157,12 +157,8 @@ void ESMWriter::writeHString(const std::string& data) | |||
|         write("\0", 1); | ||||
|     else | ||||
|     { | ||||
|         char *ptr = ToUTF8::getBuffer(data.size()+1); | ||||
|         strncpy(ptr, &data[0], data.size()); | ||||
|         ptr[data.size()] = '\0'; | ||||
| 
 | ||||
|         // Convert to UTF8 and return
 | ||||
|         std::string ascii = ToUTF8::getLegacyEnc(m_encoding); | ||||
|         std::string ascii = m_encoder->getLegacyEnc(data); | ||||
| 
 | ||||
|         write(ascii.c_str(), ascii.size()); | ||||
|     } | ||||
|  | @ -192,21 +188,9 @@ void ESMWriter::write(const char* data, int size) | |||
|     m_stream->write(data, size); | ||||
| } | ||||
| 
 | ||||
| void ESMWriter::setEncoding(const std::string& encoding) | ||||
| void ESMWriter::setEncoder(ToUTF8::Utf8Encoder* encoder) | ||||
| { | ||||
|     if (encoding == "win1250") | ||||
|     { | ||||
|         m_encoding = ToUTF8::WINDOWS_1250; | ||||
|     } | ||||
|     else if (encoding == "win1251") | ||||
|     { | ||||
|         m_encoding = ToUTF8::WINDOWS_1251; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|         // Default Latin encoding
 | ||||
|         m_encoding = ToUTF8::WINDOWS_1252; | ||||
|     } | ||||
|     m_encoder = encoder; | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -6,7 +6,7 @@ | |||
| #include <assert.h> | ||||
| 
 | ||||
| #include "esmcommon.hpp" | ||||
| #include "../to_utf8/to_utf8.hpp" | ||||
| #include <components/to_utf8/to_utf8.hpp> | ||||
| 
 | ||||
| namespace ESM { | ||||
| 
 | ||||
|  | @ -24,7 +24,7 @@ public: | |||
|     void setVersion(int ver); | ||||
|     int getType(); | ||||
|     void setType(int type); | ||||
|     void setEncoding(const std::string& encoding); // Write strings as UTF-8?
 | ||||
|     void setEncoder(ToUTF8::Utf8Encoder *encoding); // Write strings as UTF-8?
 | ||||
|     void setAuthor(const std::string& author); | ||||
|     void setDescription(const std::string& desc); | ||||
| 
 | ||||
|  | @ -94,11 +94,10 @@ private: | |||
|     std::list<RecordData> m_records; | ||||
|     std::ostream* m_stream; | ||||
|     std::streampos m_headerPos; | ||||
|     ToUTF8::FromType m_encoding; | ||||
|     ToUTF8::Utf8Encoder* m_encoder; | ||||
|     int m_recordCount; | ||||
| 
 | ||||
|     HEDRstruct m_header; | ||||
|     SaveData m_saveData; | ||||
| }; | ||||
| 
 | ||||
| } | ||||
|  |  | |||
							
								
								
									
										1
									
								
								components/to_utf8/tests/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								components/to_utf8/tests/.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| *_test | ||||
							
								
								
									
										4
									
								
								components/to_utf8/tests/output/to_utf8_test.out
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								components/to_utf8/tests/output/to_utf8_test.out
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,4 @@ | |||
| original:  Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам? | ||||
| converted: Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам? | ||||
| original:  Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger. | ||||
| converted: Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger. | ||||
							
								
								
									
										18
									
								
								components/to_utf8/tests/test.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								components/to_utf8/tests/test.sh
									
									
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| #!/bin/bash | ||||
| 
 | ||||
| make || exit | ||||
| 
 | ||||
| mkdir -p output | ||||
| 
 | ||||
| PROGS=*_test | ||||
| 
 | ||||
| for a in $PROGS; do | ||||
|     if [ -f "output/$a.out" ]; then | ||||
|         echo "Running $a:" | ||||
|         ./$a | diff output/$a.out - | ||||
|     else | ||||
|         echo "Creating $a.out" | ||||
|         ./$a > "output/$a.out" | ||||
|         git add "output/$a.out" | ||||
|     fi | ||||
| done | ||||
							
								
								
									
										1
									
								
								components/to_utf8/tests/test_data/french-utf8.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								components/to_utf8/tests/test_data/french-utf8.txt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger. | ||||
							
								
								
									
										1
									
								
								components/to_utf8/tests/test_data/french-win1252.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								components/to_utf8/tests/test_data/french-win1252.txt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| Vous lui donnez le gâteau sans protester avant d’aller chercher tous vos amis et de revenir vous venger. | ||||
							
								
								
									
										1
									
								
								components/to_utf8/tests/test_data/russian-utf8.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								components/to_utf8/tests/test_data/russian-utf8.txt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам? | ||||
							
								
								
									
										1
									
								
								components/to_utf8/tests/test_data/russian-win1251.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								components/to_utf8/tests/test_data/russian-win1251.txt
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1 @@ | |||
| Без вопросов отдаете ему рулет, зная, что позже вы сможете привести с собой своих друзей и тогда он получит по заслугам? | ||||
							
								
								
									
										59
									
								
								components/to_utf8/tests/to_utf8_test.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								components/to_utf8/tests/to_utf8_test.cpp
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,59 @@ | |||
| #include <iostream> | ||||
| #include <fstream> | ||||
| #include <cassert> | ||||
| #include <stdexcept> | ||||
| 
 | ||||
| #include "../to_utf8.hpp" | ||||
| 
 | ||||
| std::string getFirstLine(const std::string &filename); | ||||
| void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile, | ||||
|                  const std::string &utf8File); | ||||
| 
 | ||||
| /// Test character encoding conversion to and from UTF-8
 | ||||
| void testEncoder(ToUTF8::FromType encoding, const std::string &legacyEncFile, | ||||
|                  const std::string &utf8File) | ||||
| { | ||||
|     // get some test data
 | ||||
|     std::string legacyEncLine = getFirstLine(legacyEncFile); | ||||
|     std::string utf8Line = getFirstLine(utf8File); | ||||
| 
 | ||||
|     // create an encoder for specified character encoding
 | ||||
|     ToUTF8::Utf8Encoder encoder (encoding); | ||||
| 
 | ||||
|     // convert text to UTF-8
 | ||||
|     std::string convertedUtf8Line = encoder.getUtf8(legacyEncLine); | ||||
| 
 | ||||
|     std::cout << "original:  " << utf8Line          << std::endl; | ||||
|     std::cout << "converted: " << convertedUtf8Line << std::endl; | ||||
| 
 | ||||
|     // check correctness
 | ||||
|     assert(convertedUtf8Line == utf8Line); | ||||
| 
 | ||||
|     // convert UTF-8 text to legacy encoding
 | ||||
|     std::string convertedLegacyEncLine = encoder.getLegacyEnc(utf8Line); | ||||
|     // check correctness
 | ||||
|     assert(convertedLegacyEncLine == legacyEncLine); | ||||
| } | ||||
| 
 | ||||
| std::string getFirstLine(const std::string &filename) | ||||
| { | ||||
|     std::string line; | ||||
|     std::ifstream text (filename.c_str()); | ||||
| 
 | ||||
|     if (!text.is_open()) | ||||
|     { | ||||
|         throw std::runtime_error("Unable to open file " + filename); | ||||
|     } | ||||
| 
 | ||||
|     std::getline(text, line); | ||||
|     text.close(); | ||||
| 
 | ||||
|     return line; | ||||
| } | ||||
| 
 | ||||
| int main() | ||||
| { | ||||
|     testEncoder(ToUTF8::WINDOWS_1251, "test_data/russian-win1251.txt", "test_data/russian-utf8.txt"); | ||||
|     testEncoder(ToUTF8::WINDOWS_1252, "test_data/french-win1252.txt", "test_data/french-utf8.txt"); | ||||
|     return 0; | ||||
| } | ||||
|  | @ -2,6 +2,8 @@ | |||
| 
 | ||||
| #include <vector> | ||||
| #include <cassert> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| 
 | ||||
| /* This file contains the code to translate from WINDOWS-1252 (native
 | ||||
|    charset used in English version of Morrowind) to UTF-8. The library | ||||
|  | @ -39,341 +41,298 @@ | |||
| // Generated tables
 | ||||
| #include "tables_gen.hpp" | ||||
| 
 | ||||
| // Shared global buffers, we love you. These initial sizes are large
 | ||||
| // enough to hold the largest books in Morrowind.esm, but we will
 | ||||
| // resize automaticall if necessary.
 | ||||
| static std::vector<char> buf    (50*1024); | ||||
| static std::vector<char> output (50*1024); | ||||
| static int size; | ||||
| using namespace ToUTF8; | ||||
| 
 | ||||
| // Make sure the given vector is large enough for 'size' bytes,
 | ||||
| // including a terminating zero after it.
 | ||||
| static void resize(std::vector<char> &buf, size_t size) | ||||
| Utf8Encoder::Utf8Encoder(const FromType sourceEncoding): | ||||
|     mOutput(50*1024) | ||||
| { | ||||
|   if(buf.size() <= size) | ||||
|     // Add some extra padding to reduce the chance of having to resize
 | ||||
|     // again later.
 | ||||
|     buf.resize(3*size); | ||||
| 
 | ||||
|   // And make sure the string is zero terminated
 | ||||
|   buf[size] = 0; | ||||
|     switch (sourceEncoding) | ||||
|     { | ||||
|         case ToUTF8::WINDOWS_1252: | ||||
|         { | ||||
|             translationArray = ToUTF8::windows_1252; | ||||
|             break; | ||||
|         } | ||||
|         case ToUTF8::WINDOWS_1250: | ||||
|         { | ||||
|             translationArray = ToUTF8::windows_1250; | ||||
|             break; | ||||
|         } | ||||
|         case ToUTF8::WINDOWS_1251: | ||||
|         { | ||||
|             translationArray = ToUTF8::windows_1251; | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|         { | ||||
|             assert(0); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // This is just used to spew out a reusable input buffer for the
 | ||||
| // conversion process.
 | ||||
| char *ToUTF8::getBuffer(int s) | ||||
| std::string Utf8Encoder::getUtf8(const char* input, int size) | ||||
| { | ||||
|   // Remember the requested size
 | ||||
|   size = s; | ||||
|   resize(buf, size); | ||||
|   return &buf[0]; | ||||
|     // Double check that the input string stops at some point (it might
 | ||||
|     // contain zero terminators before this, inside its own data, which
 | ||||
|     // is also ok.)
 | ||||
|     assert(input[size] == 0); | ||||
| 
 | ||||
|     // TODO: The rest of this function is designed for single-character
 | ||||
|     // input encodings only. It also assumes that the input the input
 | ||||
|     // encoding shares its first 128 values (0-127) with ASCII. These
 | ||||
|     // conditions must be checked again if you add more input encodings
 | ||||
|     // later.
 | ||||
| 
 | ||||
|     // Compute output length, and check for pure ascii input at the same
 | ||||
|     // time.
 | ||||
|     bool ascii; | ||||
|     size_t outlen = getLength(input, ascii); | ||||
| 
 | ||||
|     // If we're pure ascii, then don't bother converting anything.
 | ||||
|     if(ascii) | ||||
|         return std::string(input, outlen); | ||||
| 
 | ||||
|     // Make sure the output is large enough
 | ||||
|     resize(outlen); | ||||
|     char *out = &mOutput[0]; | ||||
| 
 | ||||
|     // Translate
 | ||||
|     while (*input) | ||||
|         copyFromArray(*(input++), out); | ||||
| 
 | ||||
|     // Make sure that we wrote the correct number of bytes
 | ||||
|     assert((out-&mOutput[0]) == (int)outlen); | ||||
| 
 | ||||
|     // And make extra sure the output is null terminated
 | ||||
|     assert(mOutput.size() > outlen); | ||||
|     assert(mOutput[outlen] == 0); | ||||
| 
 | ||||
|     // Return a string
 | ||||
|     return std::string(&mOutput[0], outlen); | ||||
| } | ||||
| 
 | ||||
| std::string Utf8Encoder::getLegacyEnc(const char *input, int size) | ||||
| { | ||||
|     // Double check that the input string stops at some point (it might
 | ||||
|     // contain zero terminators before this, inside its own data, which
 | ||||
|     // is also ok.)
 | ||||
|     assert(input[size] == 0); | ||||
| 
 | ||||
|     // TODO: The rest of this function is designed for single-character
 | ||||
|     // input encodings only. It also assumes that the input the input
 | ||||
|     // encoding shares its first 128 values (0-127) with ASCII. These
 | ||||
|     // conditions must be checked again if you add more input encodings
 | ||||
|     // later.
 | ||||
| 
 | ||||
|     // Compute output length, and check for pure ascii input at the same
 | ||||
|     // time.
 | ||||
|     bool ascii; | ||||
|     size_t outlen = getLength2(input, ascii); | ||||
| 
 | ||||
|     // If we're pure ascii, then don't bother converting anything.
 | ||||
|     if(ascii) | ||||
|         return std::string(input, outlen); | ||||
| 
 | ||||
|     // Make sure the output is large enough
 | ||||
|     resize(outlen); | ||||
|     char *out = &mOutput[0]; | ||||
| 
 | ||||
|     // Translate
 | ||||
|     while(*input) | ||||
|         copyFromArray2(input, out); | ||||
| 
 | ||||
|     // Make sure that we wrote the correct number of bytes
 | ||||
|     assert((out-&mOutput[0]) == (int)outlen); | ||||
| 
 | ||||
|     // And make extra sure the output is null terminated
 | ||||
|     assert(mOutput.size() > outlen); | ||||
|     assert(mOutput[outlen] == 0); | ||||
| 
 | ||||
|     // Return a string
 | ||||
|     return std::string(&mOutput[0], outlen); | ||||
| } | ||||
| 
 | ||||
| // Make sure the output vector is large enough for 'size' bytes,
 | ||||
| // including a terminating zero after it.
 | ||||
| void Utf8Encoder::resize(size_t size) | ||||
| { | ||||
|     if (mOutput.size() <= size) | ||||
|         // Add some extra padding to reduce the chance of having to resize
 | ||||
|         // again later.
 | ||||
|         mOutput.resize(3*size); | ||||
| 
 | ||||
|     // And make sure the string is zero terminated
 | ||||
|     mOutput[size] = 0; | ||||
| } | ||||
| 
 | ||||
| /** Get the total length length needed to decode the given string with
 | ||||
|     the given translation array. The arrays are encoded with 6 bytes | ||||
|     per character, with the first giving the length and the next 5 the | ||||
|     actual data. | ||||
|   the given translation array. The arrays are encoded with 6 bytes | ||||
|   per character, with the first giving the length and the next 5 the | ||||
|   actual data. | ||||
| 
 | ||||
|     The function serves a dual purpose for optimization reasons: it | ||||
|     checks if the input is pure ascii (all values are <= 127). If this | ||||
|     is the case, then the ascii parameter is set to true, and the | ||||
|     caller can optimize for this case. | ||||
|   The function serves a dual purpose for optimization reasons: it | ||||
|   checks if the input is pure ascii (all values are <= 127). If this | ||||
|   is the case, then the ascii parameter is set to true, and the | ||||
|   caller can optimize for this case. | ||||
|  */ | ||||
| static size_t getLength(const char *arr, const char* input, bool &ascii) | ||||
| size_t Utf8Encoder::getLength(const char* input, bool &ascii) | ||||
| { | ||||
|   ascii = true; | ||||
|   size_t len = 0; | ||||
|   const char* ptr = input; | ||||
|   unsigned char inp = *ptr; | ||||
|     ascii = true; | ||||
|     size_t len = 0; | ||||
|     const char* ptr = input; | ||||
|     unsigned char inp = *ptr; | ||||
| 
 | ||||
|   // Do away with the ascii part of the string first (this is almost
 | ||||
|   // always the entire string.)
 | ||||
|   while(inp && inp < 128) | ||||
|     inp = *(++ptr); | ||||
|   len += (ptr-input); | ||||
|     // Do away with the ascii part of the string first (this is almost
 | ||||
|     // always the entire string.)
 | ||||
|     while (inp && inp < 128) | ||||
|         inp = *(++ptr); | ||||
|     len += (ptr-input); | ||||
| 
 | ||||
|   // If we're not at the null terminator at this point, then there
 | ||||
|   // were some non-ascii characters to deal with. Go to slow-mode for
 | ||||
|   // the rest of the string.
 | ||||
|   if(inp) | ||||
|     // If we're not at the null terminator at this point, then there
 | ||||
|     // were some non-ascii characters to deal with. Go to slow-mode for
 | ||||
|     // the rest of the string.
 | ||||
|     if (inp) | ||||
|     { | ||||
|       ascii = false; | ||||
|       while(inp) | ||||
|         ascii = false; | ||||
|         while (inp) | ||||
|         { | ||||
|           // Find the translated length of this character in the
 | ||||
|           // lookup table.
 | ||||
|           len += arr[inp*6]; | ||||
|           inp = *(++ptr); | ||||
|             // Find the translated length of this character in the
 | ||||
|             // lookup table.
 | ||||
|             len += translationArray[inp*6]; | ||||
|             inp = *(++ptr); | ||||
|         } | ||||
|     } | ||||
|   return len; | ||||
|     return len; | ||||
| } | ||||
| 
 | ||||
| // Translate one character 'ch' using the translation array 'arr', and
 | ||||
| // advance the output pointer accordingly.
 | ||||
| static void copyFromArray(const char *arr, unsigned char ch, char* &out) | ||||
| void Utf8Encoder::copyFromArray(unsigned char ch, char* &out) | ||||
| { | ||||
|   // Optimize for ASCII values
 | ||||
|   if(ch < 128) | ||||
|     // Optimize for ASCII values
 | ||||
|     if (ch < 128) | ||||
|     { | ||||
|       *(out++) = ch; | ||||
|       return; | ||||
|         *(out++) = ch; | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|   const char *in = arr + ch*6; | ||||
|   int len = *(in++); | ||||
|   for(int i=0; i<len; i++) | ||||
|     *(out++) = *(in++); | ||||
|     const char *in = translationArray + ch*6; | ||||
|     int len = *(in++); | ||||
|     for (int i=0; i<len; i++) | ||||
|         *(out++) = *(in++); | ||||
| } | ||||
| 
 | ||||
| std::string ToUTF8::getUtf8(ToUTF8::FromType from) | ||||
| size_t Utf8Encoder::getLength2(const char* input, bool &ascii) | ||||
| { | ||||
|   // Pick translation array
 | ||||
|   const char *arr; | ||||
|   switch (from) | ||||
|   { | ||||
|     case ToUTF8::WINDOWS_1252: | ||||
|     ascii = true; | ||||
|     size_t len = 0; | ||||
|     const char* ptr = input; | ||||
|     unsigned char inp = *ptr; | ||||
| 
 | ||||
|     // Do away with the ascii part of the string first (this is almost
 | ||||
|     // always the entire string.)
 | ||||
|     while (inp && inp < 128) | ||||
|         inp = *(++ptr); | ||||
|     len += (ptr-input); | ||||
| 
 | ||||
|     // If we're not at the null terminator at this point, then there
 | ||||
|     // were some non-ascii characters to deal with. Go to slow-mode for
 | ||||
|     // the rest of the string.
 | ||||
|     if (inp) | ||||
|     { | ||||
|       arr = ToUTF8::windows_1252; | ||||
|       break; | ||||
|     } | ||||
|     case ToUTF8::WINDOWS_1250: | ||||
|     { | ||||
|       arr = ToUTF8::windows_1250; | ||||
|       break; | ||||
|     } | ||||
|     case ToUTF8::WINDOWS_1251: | ||||
|     { | ||||
|       arr = ToUTF8::windows_1251; | ||||
|       break; | ||||
|     } | ||||
|     default: | ||||
|     { | ||||
|       assert(0); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   // Double check that the input string stops at some point (it might
 | ||||
|   // contain zero terminators before this, inside its own data, which
 | ||||
|   // is also ok.)
 | ||||
|   const char* input = &buf[0]; | ||||
|   assert(input[size] == 0); | ||||
| 
 | ||||
|   // TODO: The rest of this function is designed for single-character
 | ||||
|   // input encodings only. It also assumes that the input the input
 | ||||
|   // encoding shares its first 128 values (0-127) with ASCII. These
 | ||||
|   // conditions must be checked again if you add more input encodings
 | ||||
|   // later.
 | ||||
| 
 | ||||
|   // Compute output length, and check for pure ascii input at the same
 | ||||
|   // time.
 | ||||
|   bool ascii; | ||||
|   size_t outlen = getLength(arr, input, ascii); | ||||
| 
 | ||||
|   // If we're pure ascii, then don't bother converting anything.
 | ||||
|   if(ascii) | ||||
|     return std::string(input, outlen); | ||||
| 
 | ||||
|   // Make sure the output is large enough
 | ||||
|   resize(output, outlen); | ||||
|   char *out = &output[0]; | ||||
| 
 | ||||
|   // Translate
 | ||||
|   while(*input) | ||||
|     copyFromArray(arr, *(input++), out); | ||||
| 
 | ||||
|   // Make sure that we wrote the correct number of bytes
 | ||||
|   assert((out-&output[0]) == (int)outlen); | ||||
| 
 | ||||
|   // And make extra sure the output is null terminated
 | ||||
|   assert(output.size() > outlen); | ||||
|   assert(output[outlen] == 0); | ||||
| 
 | ||||
|   // Return a string
 | ||||
|   return std::string(&output[0], outlen); | ||||
| } | ||||
| 
 | ||||
| static size_t getLength2(const char *arr, const char* input, bool &ascii) | ||||
| { | ||||
|   ascii = true; | ||||
|   size_t len = 0; | ||||
|   const char* ptr = input; | ||||
|   unsigned char inp = *ptr; | ||||
| 
 | ||||
|   // Do away with the ascii part of the string first (this is almost
 | ||||
|   // always the entire string.)
 | ||||
|   while(inp && inp < 128) | ||||
|     inp = *(++ptr); | ||||
|   len += (ptr-input); | ||||
| 
 | ||||
|   // If we're not at the null terminator at this point, then there
 | ||||
|   // were some non-ascii characters to deal with. Go to slow-mode for
 | ||||
|   // the rest of the string.
 | ||||
|   if(inp) | ||||
|     { | ||||
|       ascii = false; | ||||
|       while(inp) | ||||
|         ascii = false; | ||||
|         while(inp) | ||||
|         { | ||||
|             len += 1; | ||||
|           // Find the translated length of this character in the
 | ||||
|           // lookup table.
 | ||||
|             // Find the translated length of this character in the
 | ||||
|             // lookup table.
 | ||||
|             switch(inp) | ||||
|             { | ||||
|             case 0xe2: len -= 2; break; | ||||
|             case 0xc2: | ||||
|             case 0xcb: | ||||
|             case 0xc4: | ||||
|             case 0xc6: | ||||
|             case 0xc3: | ||||
|             case 0xd0: | ||||
|             case 0xd1: | ||||
|             case 0xd2: | ||||
|             case 0xc5: len -= 1; break; | ||||
|                 case 0xe2: len -= 2; break; | ||||
|                 case 0xc2: | ||||
|                 case 0xcb: | ||||
|                 case 0xc4: | ||||
|                 case 0xc6: | ||||
|                 case 0xc3: | ||||
|                 case 0xd0: | ||||
|                 case 0xd1: | ||||
|                 case 0xd2: | ||||
|                 case 0xc5: len -= 1; break; | ||||
|             } | ||||
| 
 | ||||
|           inp = *(++ptr); | ||||
|             inp = *(++ptr); | ||||
|         } | ||||
|     } | ||||
|   return len; | ||||
|     return len; | ||||
| } | ||||
| 
 | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| 
 | ||||
| static void copyFromArray2(const char *arr, char*& chp, char* &out) | ||||
| void Utf8Encoder::copyFromArray2(const char*& chp, char* &out) | ||||
| { | ||||
|     unsigned char ch = *(chp++); | ||||
|   // Optimize for ASCII values
 | ||||
|   if(ch < 128) | ||||
|     // Optimize for ASCII values
 | ||||
|     if (ch < 128) | ||||
|     { | ||||
|       *(out++) = ch; | ||||
|       return; | ||||
|         *(out++) = ch; | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|   int len = 1; | ||||
|   switch (ch) | ||||
|   { | ||||
|   case 0xe2: len = 3; break; | ||||
|   case 0xc2: | ||||
|   case 0xcb: | ||||
|   case 0xc4: | ||||
|   case 0xc6: | ||||
|   case 0xc3: | ||||
|   case 0xd0: | ||||
|   case 0xd1: | ||||
|   case 0xd2: | ||||
|   case 0xc5: len = 2; break; | ||||
|   } | ||||
| 
 | ||||
|   if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
 | ||||
|   { | ||||
|       *(out++) = ch; | ||||
|       return; | ||||
|   } | ||||
| 
 | ||||
|   unsigned char ch2 = *(chp++); | ||||
|   unsigned char ch3 = '\0'; | ||||
|   if (len == 3) | ||||
|       ch3 = *(chp++); | ||||
| 
 | ||||
|   for (int i = 128; i < 256; i++) | ||||
|   { | ||||
|       unsigned char b1 = arr[i*6 + 1], b2 = arr[i*6 + 2], b3 = arr[i*6 + 3]; | ||||
|       if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3)) | ||||
|       { | ||||
|           *(out++) = (char)i; | ||||
|           return; | ||||
|       } | ||||
|   } | ||||
| 
 | ||||
|   std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl; | ||||
| 
 | ||||
|   *(out++) = ch; // Could not find glyph, just put whatever
 | ||||
| } | ||||
| 
 | ||||
| std::string ToUTF8::getLegacyEnc(ToUTF8::FromType to) | ||||
| { | ||||
|   // Pick translation array
 | ||||
|   const char *arr; | ||||
|   switch (to) | ||||
|   { | ||||
|     case ToUTF8::WINDOWS_1252: | ||||
|     int len = 1; | ||||
|     switch (ch) | ||||
|     { | ||||
|       arr = ToUTF8::windows_1252; | ||||
|       break; | ||||
|         case 0xe2: len = 3; break; | ||||
|         case 0xc2: | ||||
|         case 0xcb: | ||||
|         case 0xc4: | ||||
|         case 0xc6: | ||||
|         case 0xc3: | ||||
|         case 0xd0: | ||||
|         case 0xd1: | ||||
|         case 0xd2: | ||||
|         case 0xc5: len = 2; break; | ||||
|     } | ||||
|     case ToUTF8::WINDOWS_1250: | ||||
| 
 | ||||
|     if (len == 1) // There is no 1 length utf-8 glyph that is not 0x20 (empty space)
 | ||||
|     { | ||||
|       arr = ToUTF8::windows_1250; | ||||
|       break; | ||||
|         *(out++) = ch; | ||||
|         return; | ||||
|     } | ||||
|     case ToUTF8::WINDOWS_1251: | ||||
| 
 | ||||
|     unsigned char ch2 = *(chp++); | ||||
|     unsigned char ch3 = '\0'; | ||||
|     if (len == 3) | ||||
|         ch3 = *(chp++); | ||||
| 
 | ||||
|     for (int i = 128; i < 256; i++) | ||||
|     { | ||||
|       arr = ToUTF8::windows_1251; | ||||
|       break; | ||||
|         unsigned char b1 = translationArray[i*6 + 1], b2 = translationArray[i*6 + 2], b3 = translationArray[i*6 + 3]; | ||||
|         if (b1 == ch && b2 == ch2 && (len != 3 || b3 == ch3)) | ||||
|         { | ||||
|             *(out++) = (char)i; | ||||
|             return; | ||||
|         } | ||||
|     } | ||||
|     default: | ||||
|     { | ||||
|       assert(0); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   // Double check that the input string stops at some point (it might
 | ||||
|   // contain zero terminators before this, inside its own data, which
 | ||||
|   // is also ok.)
 | ||||
|   char* input = &buf[0]; | ||||
|   assert(input[size] == 0); | ||||
|     std::cout << "Could not find glyph " << std::hex << (int)ch << " " << (int)ch2 << " " << (int)ch3 << std::endl; | ||||
| 
 | ||||
|   // TODO: The rest of this function is designed for single-character
 | ||||
|   // input encodings only. It also assumes that the input the input
 | ||||
|   // encoding shares its first 128 values (0-127) with ASCII. These
 | ||||
|   // conditions must be checked again if you add more input encodings
 | ||||
|   // later.
 | ||||
| 
 | ||||
|   // Compute output length, and check for pure ascii input at the same
 | ||||
|   // time.
 | ||||
|   bool ascii; | ||||
|   size_t outlen = getLength2(arr, input, ascii); | ||||
| 
 | ||||
|   // If we're pure ascii, then don't bother converting anything.
 | ||||
|   if(ascii) | ||||
|       return std::string(input, outlen); | ||||
| 
 | ||||
|   // Make sure the output is large enough
 | ||||
|   resize(output, outlen); | ||||
|   char *out = &output[0]; | ||||
| 
 | ||||
|   // Translate
 | ||||
|   while(*input) | ||||
|     copyFromArray2(arr, input, out); | ||||
| 
 | ||||
|   // Make sure that we wrote the correct number of bytes
 | ||||
|   assert((out-&output[0]) == (int)outlen); | ||||
| 
 | ||||
|   // And make extra sure the output is null terminated
 | ||||
|   assert(output.size() > outlen); | ||||
|   assert(output[outlen] == 0); | ||||
| 
 | ||||
|   // Return a string
 | ||||
|   return std::string(&output[0], outlen); | ||||
|     *(out++) = ch; // Could not find glyph, just put whatever
 | ||||
| } | ||||
| 
 | ||||
| ToUTF8::FromType ToUTF8::calculateEncoding(const std::string& encodingName) | ||||
| { | ||||
|   if (encodingName == "win1250") | ||||
|     return ToUTF8::WINDOWS_1250; | ||||
|   else if (encodingName == "win1251") | ||||
|     return ToUTF8::WINDOWS_1251; | ||||
|   else | ||||
|     return ToUTF8::WINDOWS_1252; | ||||
|     if (encodingName == "win1250") | ||||
|         return ToUTF8::WINDOWS_1250; | ||||
|     else if (encodingName == "win1251") | ||||
|         return ToUTF8::WINDOWS_1251; | ||||
|     else | ||||
|         return ToUTF8::WINDOWS_1252; | ||||
| } | ||||
| 
 | ||||
| std::string ToUTF8::encodingUsingMessage(const std::string& encodingName) | ||||
| { | ||||
|   if (encodingName == "win1250") | ||||
|     return "Using Central and Eastern European font encoding."; | ||||
|   else if (encodingName == "win1251") | ||||
|     return "Using Cyrillic font encoding."; | ||||
|   else | ||||
|     return "Using default (English) font encoding."; | ||||
|     if (encodingName == "win1250") | ||||
|         return "Using Central and Eastern European font encoding."; | ||||
|     else if (encodingName == "win1251") | ||||
|         return "Using Cyrillic font encoding."; | ||||
|     else | ||||
|         return "Using default (English) font encoding."; | ||||
| } | ||||
|  |  | |||
|  | @ -2,29 +2,53 @@ | |||
| #define COMPONENTS_TOUTF8_H | ||||
| 
 | ||||
| #include <string> | ||||
| #include <cstring> | ||||
| #include <vector> | ||||
| 
 | ||||
| namespace ToUTF8 | ||||
| { | ||||
|   // These are all the currently supported code pages
 | ||||
|   enum FromType | ||||
|     // These are all the currently supported code pages
 | ||||
|     enum FromType | ||||
|     { | ||||
|       WINDOWS_1250,      // Central ane Eastern European languages
 | ||||
|       WINDOWS_1251,      // Cyrillic languages
 | ||||
|       WINDOWS_1252       // Used by English version of Morrowind (and
 | ||||
|                          // probably others)
 | ||||
|         WINDOWS_1250,      // Central ane Eastern European languages
 | ||||
|         WINDOWS_1251,      // Cyrillic languages
 | ||||
|         WINDOWS_1252       // Used by English version of Morrowind (and
 | ||||
|             // probably others)
 | ||||
|     }; | ||||
| 
 | ||||
|   // Return a writable buffer of at least 'size' bytes. The buffer
 | ||||
|   // does not have to be freed.
 | ||||
|   char* getBuffer(int size); | ||||
|     FromType calculateEncoding(const std::string& encodingName); | ||||
|     std::string encodingUsingMessage(const std::string& encodingName); | ||||
| 
 | ||||
|   // Convert the previously written buffer to UTF8 from the given code
 | ||||
|   // page.
 | ||||
|   std::string getUtf8(FromType from); | ||||
|   std::string getLegacyEnc(FromType to); | ||||
|     // class
 | ||||
| 
 | ||||
|   FromType calculateEncoding(const std::string& encodingName); | ||||
|   std::string encodingUsingMessage(const std::string& encodingName); | ||||
|     class Utf8Encoder | ||||
|     { | ||||
|         public: | ||||
|             Utf8Encoder(FromType sourceEncoding); | ||||
| 
 | ||||
|             // Convert to UTF8 from the previously given code page.
 | ||||
|             std::string getUtf8(const char *input, int size); | ||||
|             inline std::string getUtf8(const std::string &str) | ||||
|             { | ||||
|                 return getUtf8(str.c_str(), str.size()); | ||||
|             } | ||||
| 
 | ||||
|             std::string getLegacyEnc(const char *input, int size); | ||||
|             inline std::string getLegacyEnc(const std::string &str) | ||||
|             { | ||||
|                 return getLegacyEnc(str.c_str(), str.size()); | ||||
|             } | ||||
| 
 | ||||
|         private: | ||||
|             void resize(size_t size); | ||||
|             size_t getLength(const char* input, bool &ascii); | ||||
|             void copyFromArray(unsigned char chp, char* &out); | ||||
|             size_t getLength2(const char* input, bool &ascii); | ||||
|             void copyFromArray2(const char*& chp, char* &out); | ||||
| 
 | ||||
|             std::vector<char> mOutput; | ||||
|             char* translationArray; | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -50,10 +50,7 @@ namespace Translation | |||
| 
 | ||||
|             if (!line.empty()) | ||||
|             { | ||||
|                 char* buffer = ToUTF8::getBuffer(line.size() + 1); | ||||
|                 //buffer has at least line.size() + 1 bytes, so it must be safe
 | ||||
|                 strcpy(buffer, line.c_str()); | ||||
|                 line = ToUTF8::getUtf8(mEncoding); | ||||
|                 line = mEncoder->getUtf8(line); | ||||
| 
 | ||||
|                 size_t tab_pos = line.find('\t'); | ||||
|                 if (tab_pos != std::string::npos && tab_pos > 0 && tab_pos < line.size() - 1) | ||||
|  | @ -104,9 +101,9 @@ namespace Translation | |||
|             return phrase; | ||||
|     } | ||||
| 
 | ||||
|     void Storage::setEncoding (const ToUTF8::FromType& encoding) | ||||
|     void Storage::setEncoder(ToUTF8::Utf8Encoder* encoder) | ||||
|     { | ||||
|         mEncoding = encoding; | ||||
|         mEncoder = encoder; | ||||
|     } | ||||
| 
 | ||||
|     bool Storage::hasTranslation() const | ||||
|  |  | |||
|  | @ -19,7 +19,7 @@ namespace Translation | |||
|         // Standard form usually means nominative case
 | ||||
|         std::string topicStandardForm(const std::string& phrase) const; | ||||
| 
 | ||||
|         void setEncoding (const ToUTF8::FromType& encoding); | ||||
|         void setEncoder(ToUTF8::Utf8Encoder* encoder); | ||||
| 
 | ||||
|         bool hasTranslation() const; | ||||
| 
 | ||||
|  | @ -34,7 +34,7 @@ namespace Translation | |||
|         void loadDataFromStream(ContainerType& container, std::istream& stream); | ||||
| 
 | ||||
| 
 | ||||
|         ToUTF8::FromType mEncoding; | ||||
|         ToUTF8::Utf8Encoder* mEncoder; | ||||
|         ContainerType mCellNamesTranslations, mTopicIDs, mPhraseForms; | ||||
|     }; | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue