Fix keyword search when the keyword is preceded by a non whitespace non alpha character

pull/3152/head
florent teppe 3 years ago committed by psi29a
parent cd4d76f8c5
commit e5abadc234

@ -3,7 +3,6 @@
#include <cctype>
#include <map>
#include <limits>
#include <stdexcept>
#include <vector>
#include <algorithm> // std::reverse
@ -69,18 +68,6 @@ public:
return false;
}
static bool isWhitespaceUTF8(const int utf8Char)
{
if (utf8Char >= 0 && utf8Char <= static_cast<int>( std::numeric_limits<unsigned char>::max()))
{
//That function has undefined behavior if the character doesn't fit in unsigned char
return std::isspace(utf8Char);
}
else
{
return false;
}
}
static bool sortMatches(const Match& left, const Match& right)
{
@ -92,16 +79,6 @@ public:
std::vector<Match> matches;
for (Point i = beg; i != end; ++i)
{
// check if previous character marked start of new word
if (i != beg)
{
Point prev = i;
--prev;
if(!isWhitespaceUTF8(*prev))
continue;
}
// check first character
typename Entry::childen_t::iterator candidate = mRoot.mChildren.find (Misc::StringUtils::toLower (*i));

@ -74,7 +74,7 @@ TEST_F(KeywordSearchTest, keyword_test_utf8_word_begin)
search.seed("états", 0);
search.seed("ïrradiés", 0);
search.seed("ça nous déçois", 0);
search.seed("ois", 0);
std::string text = "les nations unis ont réunis le monde entier, états units inclus pour parler du problème des gens ïrradiés et ça nous déçois";
@ -86,3 +86,51 @@ TEST_F(KeywordSearchTest, keyword_test_utf8_word_begin)
EXPECT_EQ (std::string( matches[1].mBeg, matches[1].mEnd) , "ïrradiés");
EXPECT_EQ (std::string( matches[2].mBeg, matches[2].mEnd) , "ça nous déçois");
}
TEST_F(KeywordSearchTest, keyword_test_non_alpha_non_whitespace_word_begin)
{
// We make sure that the search works well even if the separator is not a whitespace
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Report to caius cosades", 0);
std::string text = "I was told to \"Report to caius cosades\"";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Report to caius cosades");
}
TEST_F(KeywordSearchTest, keyword_test_russian_non_ascii_before)
{
// We make sure that the search works well even if the separator is not a whitespace with russian chars
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Доложить Каю Косадесу", 0);
std::string text = "Что? Да. Я Кай Косадес. То есть как это, вам велели «Доложить Каю Косадесу»? О чем вы говорите?";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Доложить Каю Косадесу");
}
TEST_F(KeywordSearchTest, keyword_test_russian_ascii_before)
{
// We make sure that the search works well even if the separator is not a whitespace with russian chars
MWDialogue::KeywordSearch<std::string, int> search;
search.seed("Доложить Каю Косадесу", 0);
std::string text = "Что? Да. Я Кай Косадес. То есть как это, вам велели 'Доложить Каю Косадесу'? О чем вы говорите?";
std::vector<MWDialogue::KeywordSearch<std::string, int>::Match> matches;
search.highlightKeywords(text.begin(), text.end(), matches);
EXPECT_EQ(matches.size(), 1);
EXPECT_EQ(std::string(matches[0].mBeg, matches[0].mEnd), "Доложить Каю Косадесу");
}

Loading…
Cancel
Save