2014-10-18 19:51:07 +02:00
|
|
|
#ifndef GAME_MWDIALOGUE_KEYWORDSEARCH_H
|
|
|
|
#define GAME_MWDIALOGUE_KEYWORDSEARCH_H
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
#include <algorithm> // std::reverse
|
2015-11-30 00:38:52 +01:00
|
|
|
#include <cctype>
|
2021-09-20 21:01:28 +02:00
|
|
|
#include <map>
|
2013-05-04 14:15:47 +02:00
|
|
|
#include <stdexcept>
|
2013-05-04 17:40:00 +02:00
|
|
|
#include <vector>
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2022-08-03 00:00:54 +02:00
|
|
|
#include <components/misc/strings/algorithm.hpp>
|
|
|
|
#include <components/misc/strings/lower.hpp>
|
2013-05-06 15:14:39 +02:00
|
|
|
|
2014-10-18 19:51:07 +02:00
|
|
|
namespace MWDialogue
|
2013-05-06 15:14:39 +02:00
|
|
|
{
|
|
|
|
|
2013-05-04 14:15:47 +02:00
|
|
|
template <typename string_t, typename value_t>
|
|
|
|
class KeywordSearch
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef typename string_t::const_iterator Point;
|
|
|
|
|
|
|
|
struct Match
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2013-05-04 14:15:47 +02:00
|
|
|
Point mBeg;
|
2022-09-22 21:26:05 +03:00
|
|
|
Point mEnd;
|
2013-05-04 14:15:47 +02:00
|
|
|
value_t mValue;
|
|
|
|
};
|
|
|
|
|
2021-10-10 16:15:40 +00:00
|
|
|
void seed(string_t keyword, value_t value)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2021-10-10 16:15:40 +00:00
|
|
|
if (keyword.empty())
|
2013-05-06 15:14:39 +02:00
|
|
|
return;
|
|
|
|
seed_impl(std::move(keyword), std::move(value), 0, mRoot);
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2013-05-06 15:14:39 +02:00
|
|
|
|
2015-12-07 21:58:30 +01:00
|
|
|
void clear()
|
2013-05-06 15:14:39 +02:00
|
|
|
{
|
|
|
|
mRoot.mChildren.clear();
|
|
|
|
mRoot.mKeyword.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool containsKeyword(const string_t& keyword, value_t& value)
|
|
|
|
{
|
|
|
|
typename Entry::childen_t::iterator current;
|
2015-12-07 21:58:30 +01:00
|
|
|
typename Entry::childen_t::iterator next;
|
2022-09-22 21:26:05 +03:00
|
|
|
|
2015-12-07 21:58:30 +01:00
|
|
|
current = mRoot.mChildren.find(Misc::StringUtils::toLower(*keyword.begin()));
|
2013-05-06 15:14:39 +02:00
|
|
|
if (current == mRoot.mChildren.end())
|
|
|
|
return false;
|
|
|
|
else if (current->second.mKeyword.size() && Misc::StringUtils::ciEqual(current->second.mKeyword, keyword))
|
|
|
|
{
|
|
|
|
value = current->second.mValue;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (Point i = ++keyword.begin(); i != keyword.end(); ++i)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2021-12-24 00:54:00 +03:00
|
|
|
next = current->second.mChildren.find(Misc::StringUtils::toLower(*i));
|
2013-05-04 14:15:47 +02:00
|
|
|
if (next == current->second.mChildren.end())
|
2013-05-06 15:14:39 +02:00
|
|
|
return false;
|
|
|
|
if (Misc::StringUtils::ciEqual(next->second.mKeyword, keyword))
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2021-10-10 16:15:40 +00:00
|
|
|
value = next->second.mValue;
|
2013-05-06 15:14:39 +02:00
|
|
|
return true;
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2013-05-06 15:14:39 +02:00
|
|
|
current = next;
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2013-05-06 15:14:39 +02:00
|
|
|
return false;
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2021-09-18 18:25:04 +02:00
|
|
|
|
2015-01-09 20:08:52 +01:00
|
|
|
static bool sortMatches(const Match& left, const Match& right) { return left.mBeg < right.mBeg; }
|
|
|
|
|
2021-12-24 00:54:00 +03:00
|
|
|
void highlightKeywords(Point beg, Point end, std::vector<Match>& out) const
|
2013-05-04 14:15:47 +02:00
|
|
|
{
|
2015-01-09 20:08:52 +01:00
|
|
|
std::vector<Match> matches;
|
2013-05-04 14:15:47 +02:00
|
|
|
for (Point i = beg; i != end; ++i)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2013-05-04 14:15:47 +02:00
|
|
|
// check first character
|
2021-12-24 00:54:00 +03:00
|
|
|
typename Entry::childen_t::const_iterator candidate
|
|
|
|
= mRoot.mChildren.find(Misc::StringUtils::toLower(*i));
|
2013-05-04 14:15:47 +02:00
|
|
|
|
|
|
|
// no match, on to next character
|
|
|
|
if (candidate == mRoot.mChildren.end())
|
|
|
|
continue;
|
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
// see how far the match goes
|
|
|
|
Point j = i;
|
|
|
|
|
2021-12-24 00:54:00 +03:00
|
|
|
// some keywords might be longer variations of other keywords, so we definitely need a list of
|
|
|
|
// candidates the first element in the pair is length of the match, i.e. depth from the first character
|
2022-09-22 21:26:05 +03:00
|
|
|
// on
|
2021-12-24 00:54:00 +03:00
|
|
|
std::vector<typename std::pair<int, typename Entry::childen_t::const_iterator>> candidates;
|
2013-05-04 14:15:47 +02:00
|
|
|
|
|
|
|
while ((j + 1) != end)
|
2013-05-04 17:40:00 +02:00
|
|
|
{
|
|
|
|
typename Entry::childen_t::const_iterator next
|
|
|
|
= candidate->second.mChildren.find(Misc::StringUtils::toLower(*++j));
|
2013-05-04 14:15:47 +02:00
|
|
|
|
|
|
|
if (next == candidate->second.mChildren.end())
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2013-05-04 17:40:00 +02:00
|
|
|
if (candidate->second.mKeyword.size() > 0)
|
|
|
|
candidates.push_back(std::make_pair((j - i), candidate));
|
2022-09-22 21:26:05 +03:00
|
|
|
break;
|
2013-05-04 14:15:47 +02:00
|
|
|
}
|
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
candidate = next;
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
if (candidate->second.mKeyword.size() > 0)
|
|
|
|
candidates.push_back(std::make_pair((j - i), candidate));
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2021-12-24 00:54:00 +03:00
|
|
|
if (candidates.empty())
|
|
|
|
continue; // didn't match enough to disambiguate, on to next character
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
// shorter candidates will be added to the vector first. however, we want to check against longer
|
|
|
|
// candidates first
|
|
|
|
std::reverse(candidates.begin(), candidates.end());
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
for (typename std::vector<std::pair<int, typename Entry::childen_t::const_iterator>>::iterator it
|
|
|
|
= candidates.begin();
|
|
|
|
it != candidates.end(); ++it)
|
|
|
|
{
|
|
|
|
candidate = it->second;
|
|
|
|
// try to match the rest of the keyword
|
|
|
|
Point k = i + it->first;
|
2015-12-07 21:58:30 +01:00
|
|
|
typename string_t::const_iterator t = candidate->second.mKeyword.begin() + (k - i);
|
2022-09-22 21:26:05 +03:00
|
|
|
|
2015-12-07 21:58:30 +01:00
|
|
|
while (k != end && t != candidate->second.mKeyword.end())
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2015-12-07 21:58:30 +01:00
|
|
|
if (Misc::StringUtils::toLower(*k) != Misc::StringUtils::toLower(*t))
|
2013-05-04 17:40:00 +02:00
|
|
|
break;
|
2022-09-22 21:26:05 +03:00
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
++k, ++t;
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
|
|
|
|
2013-05-04 17:40:00 +02:00
|
|
|
// didn't match full keyword, try the next candidate
|
|
|
|
if (t != candidate->second.mKeyword.end())
|
|
|
|
continue;
|
2022-09-22 21:26:05 +03:00
|
|
|
|
2015-01-09 04:19:38 +01:00
|
|
|
// found a keyword, but there might still be longer keywords that start somewhere _within_ this
|
|
|
|
// keyword we will resolve these overlapping keywords later, choosing the longest one in case of
|
|
|
|
// conflict
|
|
|
|
Match match;
|
2013-05-04 17:40:00 +02:00
|
|
|
match.mValue = candidate->second.mValue;
|
|
|
|
match.mBeg = i;
|
|
|
|
match.mEnd = k;
|
2015-01-09 20:08:52 +01:00
|
|
|
matches.push_back(match);
|
2022-09-22 21:26:05 +03:00
|
|
|
break;
|
2013-05-04 17:40:00 +02:00
|
|
|
}
|
|
|
|
}
|
2013-05-04 14:15:47 +02:00
|
|
|
|
2015-01-09 04:19:38 +01:00
|
|
|
// resolve overlapping keywords
|
2016-02-22 19:06:12 +01:00
|
|
|
while (!matches.empty())
|
2015-01-09 04:19:38 +01:00
|
|
|
{
|
2015-01-09 20:08:52 +01:00
|
|
|
int longestKeywordSize = 0;
|
2015-01-27 19:00:26 +01:00
|
|
|
typename std::vector<Match>::iterator longestKeyword = matches.begin();
|
2015-01-09 20:08:52 +01:00
|
|
|
for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end(); ++it)
|
2015-01-09 04:19:38 +01:00
|
|
|
{
|
|
|
|
int size = it->mEnd - it->mBeg;
|
2015-01-09 20:08:52 +01:00
|
|
|
if (size > longestKeywordSize)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2015-01-09 20:08:52 +01:00
|
|
|
longestKeywordSize = size;
|
|
|
|
longestKeyword = it;
|
2015-01-09 04:19:38 +01:00
|
|
|
}
|
2015-01-09 20:08:52 +01:00
|
|
|
|
|
|
|
typename std::vector<Match>::iterator next = it;
|
|
|
|
++next;
|
|
|
|
|
|
|
|
if (next == matches.end())
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (it->mEnd <= next->mBeg)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2015-01-09 20:08:52 +01:00
|
|
|
break; // no overlap
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-09 20:08:52 +01:00
|
|
|
Match keyword = *longestKeyword;
|
|
|
|
matches.erase(longestKeyword);
|
|
|
|
out.push_back(keyword);
|
|
|
|
// erase anything that overlaps with the keyword we just added to the output
|
|
|
|
for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end();)
|
2015-01-09 04:19:38 +01:00
|
|
|
{
|
2015-01-09 20:08:52 +01:00
|
|
|
if (it->mBeg < keyword.mEnd && it->mEnd > keyword.mBeg)
|
|
|
|
it = matches.erase(it);
|
2022-09-22 21:26:05 +03:00
|
|
|
else
|
|
|
|
++it;
|
2015-01-09 04:19:38 +01:00
|
|
|
}
|
|
|
|
}
|
2015-01-09 20:08:52 +01:00
|
|
|
|
|
|
|
std::sort(out.begin(), out.end(), sortMatches);
|
2015-01-09 04:19:38 +01:00
|
|
|
}
|
2015-01-09 20:08:52 +01:00
|
|
|
|
2013-05-04 14:15:47 +02:00
|
|
|
private:
|
|
|
|
struct Entry
|
|
|
|
{
|
|
|
|
typedef std::map<wchar_t, Entry> childen_t;
|
|
|
|
|
|
|
|
string_t mKeyword;
|
|
|
|
value_t mValue;
|
|
|
|
childen_t mChildren;
|
|
|
|
};
|
|
|
|
|
|
|
|
void seed_impl(string_t keyword, value_t value, size_t depth, Entry& entry)
|
|
|
|
{
|
2015-12-07 21:58:30 +01:00
|
|
|
int ch = Misc::StringUtils::toLower(keyword.at(depth));
|
2013-05-04 14:15:47 +02:00
|
|
|
|
|
|
|
typename Entry::childen_t::iterator j = entry.mChildren.find(ch);
|
|
|
|
|
|
|
|
if (j == entry.mChildren.end())
|
|
|
|
{
|
2021-10-10 16:15:40 +00:00
|
|
|
entry.mChildren[ch].mValue = std::move(value);
|
|
|
|
entry.mChildren[ch].mKeyword = std::move(keyword);
|
2013-05-04 14:15:47 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (j->second.mKeyword.size() > 0)
|
2022-09-22 21:26:05 +03:00
|
|
|
{
|
2013-05-04 14:15:47 +02:00
|
|
|
if (keyword == j->second.mKeyword)
|
|
|
|
throw std::runtime_error("duplicate keyword inserted");
|
|
|
|
|
2021-10-10 16:15:40 +00:00
|
|
|
value_t pushValue = j->second.mValue;
|
|
|
|
string_t pushKeyword = j->second.mKeyword;
|
2013-05-04 14:15:47 +02:00
|
|
|
|
|
|
|
if (depth >= pushKeyword.size())
|
|
|
|
throw std::runtime_error("unexpected");
|
|
|
|
|
|
|
|
if (depth + 1 < pushKeyword.size())
|
2013-05-04 17:40:00 +02:00
|
|
|
{
|
2021-10-10 16:15:40 +00:00
|
|
|
seed_impl(std::move(pushKeyword), std::move(pushValue), depth + 1, j->second);
|
2013-05-04 17:40:00 +02:00
|
|
|
j->second.mKeyword.clear();
|
2022-09-22 21:26:05 +03:00
|
|
|
}
|
2013-05-04 17:40:00 +02:00
|
|
|
}
|
|
|
|
if (depth + 1 == keyword.size())
|
|
|
|
j->second.mKeyword = value;
|
|
|
|
else // depth+1 < keyword.size()
|
2021-10-10 16:15:40 +00:00
|
|
|
seed_impl(std::move(keyword), std::move(value), depth + 1, j->second);
|
2013-05-04 14:15:47 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Entry mRoot;
|
|
|
|
};
|
|
|
|
|
2013-05-06 15:14:39 +02:00
|
|
|
}
|
|
|
|
|
2013-05-04 14:15:47 +02:00
|
|
|
#endif
|