Provide multibyte toLower() and single chars comparator

new-script-api
Andrei Kortunov 7 years ago
parent 4dcaf040e6
commit a391990f2a

@ -311,29 +311,39 @@ struct JournalViewModelImpl : JournalViewModel
for (MWBase::Journal::TTopicIter i = journal->topicBegin (); i != journal->topicEnd (); ++i)
{
if (i->first.length() < 2)
continue;
unsigned char byte1 = i->first[0];
unsigned char byte2 = i->first[1];
// First, check for two-byte UTF-8 symbols, e.g. Cyrillic ones
// TODO: check which language journal index is using
if ((byte1 == 0xd0 || byte1 == 0xd1) && i->first.length() >= 2)
{
unsigned char byte2 = i->first[1];
// Upper case
if (byte1 == 0xd0 && byte2 >= 0xb0 && byte2 < 0xc0)
byte2 -= 32;
std::pair<unsigned char, unsigned char> symbol = Misc::StringUtils::toLower(byte1, byte2);
if (byte1 == 0xd1 && byte2 >= 0x80 && byte2 < 0x90)
{
byte1 -= 1;
byte2 += 32;
}
// CYRILLIC LETTER A - CYRILLIC LETTER PE
// index from 1 to 16
if (symbol.first == 0xd0 && symbol.second >= (0xaf + index) && symbol.second < (0xbf + index) && symbol.second == (0xaf + index))
{
visitor (i->second.getName());
continue;
}
// CYRILLIC CAPITAL A is a 0xd090 in UTF-8
// so we can use 0xd08f + index
// (index is a position of letter in alphabet, begins from 1)
if (byte1 != 0xd0 || byte2 != 0x8f + index)
continue;
// CYRILLIC LETTERL R - CYRILLIC LETTER YA
// index from 17 to 32
if (symbol.first == 0xd1 && symbol.second >= (0x6f + index) && symbol.second < (0x7f + index) && symbol.second == (0x6f + index))
{
visitor (i->second.getName());
continue;
}
}
else
{
// Otherwise check for regular Latin symbols, 0x61 = 'a'
if (i->first [0] != 0x60 + index)
continue;
visitor (i->second.getName());
visitor (i->second.getName());
}
}
}

@ -55,6 +55,36 @@ public:
};
}
static std::pair<char, char> toLower(unsigned char byte1, unsigned char byte2)
{
std::pair<unsigned char, unsigned char> symbol = std::make_pair(byte1, byte2);
// CYRILLIC CAPITAL IO
if (symbol.first == 0xd0 && symbol.second == 0x01)
{
symbol.first++;
symbol.second = 0x91;
}
// CYRILLIC CAPITAL A - CYRILLIC CAPITAL PE
else if (symbol.first == 0xd0 && symbol.second >= 0x90 && symbol.second < 0xa0)
{
symbol.second += 0x20;
}
// CYRILLIC CAPITAL R - CYRILLIC CAPITAL YA
else if (symbol.first == 0xd0 && symbol.second >= 0xa0 && symbol.second < 0xb0)
{
symbol.first++;
symbol.second -= 0x20;
}
// Other symbols
else
{
symbol.first = toLower(symbol.first);
symbol.second = toLower(symbol.second);
}
return symbol;
}
static bool ciLess(const std::string &x, const std::string &y) {
return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end(), ci());
}

Loading…
Cancel
Save