From a391990f2a52695b3c56afedaea5ffb80581f1e4 Mon Sep 17 00:00:00 2001 From: Andrei Kortunov Date: Mon, 20 Nov 2017 21:30:46 +0400 Subject: [PATCH] Provide multibyte toLower() and single chars comparator --- apps/openmw/mwgui/journalviewmodel.cpp | 46 ++++++++++++++++---------- components/misc/stringops.hpp | 30 +++++++++++++++++ 2 files changed, 58 insertions(+), 18 deletions(-) diff --git a/apps/openmw/mwgui/journalviewmodel.cpp b/apps/openmw/mwgui/journalviewmodel.cpp index 02103280b..33935f040 100644 --- a/apps/openmw/mwgui/journalviewmodel.cpp +++ b/apps/openmw/mwgui/journalviewmodel.cpp @@ -311,29 +311,39 @@ struct JournalViewModelImpl : JournalViewModel for (MWBase::Journal::TTopicIter i = journal->topicBegin (); i != journal->topicEnd (); ++i) { - if (i->first.length() < 2) - continue; - unsigned char byte1 = i->first[0]; - unsigned char byte2 = i->first[1]; + // First, check for two-byte UTF-8 symbols, e.g. Cyrillic ones + // TODO: check which language journal index is using + if ((byte1 == 0xd0 || byte1 == 0xd1) && i->first.length() >= 2) + { + unsigned char byte2 = i->first[1]; - // Upper case - if (byte1 == 0xd0 && byte2 >= 0xb0 && byte2 < 0xc0) - byte2 -= 32; + std::pair symbol = Misc::StringUtils::toLower(byte1, byte2); - if (byte1 == 0xd1 && byte2 >= 0x80 && byte2 < 0x90) - { - byte1 -= 1; - byte2 += 32; - } + // CYRILLIC LETTER A - CYRILLIC LETTER PE + // index from 1 to 16 + if (symbol.first == 0xd0 && symbol.second >= (0xaf + index) && symbol.second < (0xbf + index) && symbol.second == (0xaf + index)) + { + visitor (i->second.getName()); + continue; + } - // CYRILLIC CAPITAL A is a 0xd090 in UTF-8 - // so we can use 0xd08f + index - // (index is a position of letter in alphabet, begins from 1) - if (byte1 != 0xd0 || byte2 != 0x8f + index) - continue; + // CYRILLIC LETTERL R - CYRILLIC LETTER YA + // index from 17 to 32 + if (symbol.first == 0xd1 && symbol.second >= (0x6f + index) && symbol.second < (0x7f + index) && symbol.second == (0x6f + index)) + { + visitor (i->second.getName()); + continue; + } + } + else + { + // Otherwise check for regular Latin symbols, 0x61 = 'a' + if (i->first [0] != 0x60 + index) + continue; - visitor (i->second.getName()); + visitor (i->second.getName()); + } } } diff --git a/components/misc/stringops.hpp b/components/misc/stringops.hpp index 9acd81710..97865a44c 100644 --- a/components/misc/stringops.hpp +++ b/components/misc/stringops.hpp @@ -55,6 +55,36 @@ public: }; } + static std::pair toLower(unsigned char byte1, unsigned char byte2) + { + std::pair symbol = std::make_pair(byte1, byte2); + // CYRILLIC CAPITAL IO + if (symbol.first == 0xd0 && symbol.second == 0x01) + { + symbol.first++; + symbol.second = 0x91; + } + // CYRILLIC CAPITAL A - CYRILLIC CAPITAL PE + else if (symbol.first == 0xd0 && symbol.second >= 0x90 && symbol.second < 0xa0) + { + symbol.second += 0x20; + } + // CYRILLIC CAPITAL R - CYRILLIC CAPITAL YA + else if (symbol.first == 0xd0 && symbol.second >= 0xa0 && symbol.second < 0xb0) + { + symbol.first++; + symbol.second -= 0x20; + } + // Other symbols + else + { + symbol.first = toLower(symbol.first); + symbol.second = toLower(symbol.second); + } + + return symbol; + } + static bool ciLess(const std::string &x, const std::string &y) { return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end(), ci()); }