1
0
Fork 0
mirror of https://github.com/OpenMW/openmw.git synced 2025-10-24 15:26:37 +00:00

Fix keyword search when the keyword is preceded by a non whitespace non alpha character

This commit is contained in:
florent teppe 2021-10-07 13:26:40 +00:00 committed by psi29a
parent cd4d76f8c5
commit e5abadc234
2 changed files with 49 additions and 24 deletions

View file

@ -3,7 +3,6 @@
#include <cctype> #include <cctype>
#include <map> #include <map>
#include <limits>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include <algorithm> // std::reverse #include <algorithm> // std::reverse
@ -69,18 +68,6 @@ public:
return false; return false;
} }
static bool isWhitespaceUTF8(const int utf8Char)
{
if (utf8Char >= 0 && utf8Char <= static_cast<int>( std::numeric_limits<unsigned char>::max()))
{
//That function has undefined behavior if the character doesn't fit in unsigned char
return std::isspace(utf8Char);
}
else
{
return false;
}
}
static bool sortMatches(const Match& left, const Match& right) static bool sortMatches(const Match& left, const Match& right)
{ {
@ -92,16 +79,6 @@ public:
std::vector<Match> matches; std::vector<Match> matches;
for (Point i = beg; i != end; ++i) for (Point i = beg; i != end; ++i)
{ {
// check if previous character marked start of new word
if (i != beg)
{
Point prev = i;
--prev;
if(!isWhitespaceUTF8(*prev))
continue;
}
// check first character // check first character
typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i)); typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i));

View file

@ -74,7 +74,7 @@ TEST_F(KeywordSearchTest, keyword_test_utf8_word_begin)
search.seed("états", 0); search.seed("états", 0);
search.seed("ïrradiés", 0); search.seed("ïrradiés", 0);
search.seed("ça nous déçois", 0); search.seed("ça nous déçois", 0);
search.seed("ois", 0);
std::string text = "les nations unis ont réunis le monde entier, états units inclus pour parler du problème des gens ïrradiés et ça nous déçois"; std::string text = "les nations unis ont réunis le monde entier, états units inclus pour parler du problème des gens ïrradiés et ça nous déçois";
@ -86,3 +86,51 @@ TEST_F(KeywordSearchTest, keyword_test_utf8_word_begin)
EXPECT_EQ (std::string( matches[1].mBeg, matches[1].mEnd) , "ïrradiés"); EXPECT_EQ (std::string( matches[1].mBeg, matches[1].mEnd) , "ïrradiés");
EXPECT_EQ (std::string( matches[2].mBeg, matches[2].mEnd) , "ça nous déçois"); EXPECT_EQ (std::string( matches[2].mBeg, matches[2].mEnd) , "ça nous déçois");
} }
TEST_F(KeywordSearchTest, keyword_test_non_alpha_non_whitespace_word_begin)
{
// We make sure that the search works well even if the separator is not a whitespace
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Report to caius cosades", 0);
std::string text = "I was told to \"Report to caius cosades\"";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Report to caius cosades");
}
TEST_F(KeywordSearchTest, keyword_test_russian_non_ascii_before)
{
// We make sure that the search works well even if the separator is not a whitespace with russian chars
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Доложить Каю Косадесу", 0);
std::string text = "Что? Да. Я Кай Косадес. То есть как это, вам велели «Доложить Каю Косадесу»? О чем вы говорите?";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Доложить Каю Косадесу");
}
TEST_F(KeywordSearchTest, keyword_test_russian_ascii_before)
{
// We make sure that the search works well even if the separator is not a whitespace with russian chars
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Доложить Каю Косадесу", 0);
std::string text = "Что? Да. Я Кай Косадес. То есть как это, вам велели 'Доложить Каю Косадесу'? О чем вы говорите?";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Доложить Каю Косадесу");
}