1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-02-11 06:40:34 +00:00
OpenMW/apps/openmw/mwdialogue/keywordsearch.hpp

242 lines
8.3 KiB
C++
Raw Normal View History

#ifndef GAME_MWDIALOGUE_KEYWORDSEARCH_H
#define GAME_MWDIALOGUE_KEYWORDSEARCH_H
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
#include <algorithm> // std::reverse
#include <cctype>
2021-09-20 21:01:28 +02:00
#include <map>
2013-05-04 14:15:47 +02:00
#include <stdexcept>
#include <vector>
2013-05-04 14:15:47 +02:00
#include <components/misc/strings/algorithm.hpp>
#include <components/misc/strings/lower.hpp>
2013-05-06 15:14:39 +02:00
namespace MWDialogue
2013-05-06 15:14:39 +02:00
{
template <typename value_t>
2022-09-22 21:26:05 +03:00
class KeywordSearch
2013-05-04 14:15:47 +02:00
{
2022-09-22 21:26:05 +03:00
public:
using Point = std::string::const_iterator;
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
struct Match
{
Point mBeg;
Point mEnd;
value_t mValue;
};
2013-05-04 14:15:47 +02:00
void seed(std::string_view keyword, value_t value)
2022-09-22 21:26:05 +03:00
{
if (keyword.empty())
return;
seed_impl(keyword, std::move(value), 0, mRoot);
2022-09-22 21:26:05 +03:00
}
2013-05-06 15:14:39 +02:00
2022-09-22 21:26:05 +03:00
void clear()
2013-05-06 15:14:39 +02:00
{
2022-09-22 21:26:05 +03:00
mRoot.mChildren.clear();
mRoot.mKeyword.clear();
2013-05-06 15:14:39 +02:00
}
bool containsKeyword(std::string_view keyword, value_t& value)
2013-05-06 15:14:39 +02:00
{
auto it = keyword.begin();
auto current = mRoot.mChildren.find(Misc::StringUtils::toLower(*it));
2022-09-22 21:26:05 +03:00
if (current == mRoot.mChildren.end())
2013-05-06 15:14:39 +02:00
return false;
else if (Misc::StringUtils::ciEqual(current->second.mKeyword, keyword))
2013-05-06 15:14:39 +02:00
{
2022-09-22 21:26:05 +03:00
value = current->second.mValue;
2013-05-06 15:14:39 +02:00
return true;
}
for (++it; it != keyword.end(); ++it)
2022-09-22 21:26:05 +03:00
{
auto next = current->second.mChildren.find(Misc::StringUtils::toLower(*it));
2022-09-22 21:26:05 +03:00
if (next == current->second.mChildren.end())
return false;
if (Misc::StringUtils::ciEqual(next->second.mKeyword, keyword))
{
value = next->second.mValue;
return true;
}
current = next;
}
return false;
}
2022-09-22 21:26:05 +03:00
static bool sortMatches(const Match& left, const Match& right) { return left.mBeg < right.mBeg; }
2022-09-22 21:26:05 +03:00
void highlightKeywords(Point beg, Point end, std::vector<Match>& out) const
2013-05-04 14:15:47 +02:00
{
2022-09-22 21:26:05 +03:00
std::vector<Match> matches;
for (Point i = beg; i != end; ++i)
{
// check first character
typename Entry::childen_t::const_iterator candidate
= mRoot.mChildren.find(Misc::StringUtils::toLower(*i));
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
// no match, on to next character
if (candidate == mRoot.mChildren.end())
continue;
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
// see how far the match goes
Point j = i;
2022-09-22 21:26:05 +03:00
// some keywords might be longer variations of other keywords, so we definitely need a list of
// candidates the first element in the pair is length of the match, i.e. depth from the first character
// on
2024-03-23 00:33:50 +00:00
std::vector<typename std::pair<std::ptrdiff_t, typename Entry::childen_t::const_iterator>> candidates;
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
while ((j + 1) != end)
{
2022-09-22 21:26:05 +03:00
typename Entry::childen_t::const_iterator next
= candidate->second.mChildren.find(Misc::StringUtils::toLower(*++j));
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (next == candidate->second.mChildren.end())
{
if (candidate->second.mKeyword.size() > 0)
candidates.push_back(std::make_pair((j - i), candidate));
break;
}
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
candidate = next;
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (candidate->second.mKeyword.size() > 0)
candidates.push_back(std::make_pair((j - i), candidate));
}
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (candidates.empty())
continue; // didn't match enough to disambiguate, on to next character
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
// shorter candidates will be added to the vector first. however, we want to check against longer
// candidates first
std::reverse(candidates.begin(), candidates.end());
2013-05-04 14:15:47 +02:00
for (const auto& [pos, c] : candidates)
{
candidate = c;
2022-09-22 21:26:05 +03:00
// try to match the rest of the keyword
Point k = i + pos;
Point t = candidate->second.mKeyword.begin() + (k - i);
2022-09-22 21:26:05 +03:00
while (k != end && t != candidate->second.mKeyword.end())
{
if (Misc::StringUtils::toLower(*k) != Misc::StringUtils::toLower(*t))
break;
++k, ++t;
}
// didn't match full keyword, try the next candidate
if (t != candidate->second.mKeyword.end())
continue;
// found a keyword, but there might still be longer keywords that start somewhere _within_ this
// keyword we will resolve these overlapping keywords later, choosing the longest one in case of
// conflict
Match match;
match.mValue = candidate->second.mValue;
match.mBeg = i;
match.mEnd = k;
matches.push_back(match);
break;
}
}
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
// resolve overlapping keywords
while (!matches.empty())
{
std::size_t longestKeywordSize = 0;
2022-09-22 21:26:05 +03:00
typename std::vector<Match>::iterator longestKeyword = matches.begin();
for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end(); ++it)
{
std::size_t size = it->mEnd - it->mBeg;
2022-09-22 21:26:05 +03:00
if (size > longestKeywordSize)
{
longestKeywordSize = size;
longestKeyword = it;
}
2022-09-22 21:26:05 +03:00
typename std::vector<Match>::iterator next = it;
++next;
2022-09-22 21:26:05 +03:00
if (next == matches.end())
break;
2022-09-22 21:26:05 +03:00
if (it->mEnd <= next->mBeg)
{
break; // no overlap
}
}
Match keyword = *longestKeyword;
matches.erase(longestKeyword);
out.push_back(keyword);
// erase anything that overlaps with the keyword we just added to the output
for (typename std::vector<Match>::iterator it = matches.begin(); it != matches.end();)
{
2022-09-22 21:26:05 +03:00
if (it->mBeg < keyword.mEnd && it->mEnd > keyword.mBeg)
it = matches.erase(it);
else
++it;
}
}
2022-09-22 21:26:05 +03:00
std::sort(out.begin(), out.end(), sortMatches);
}
2022-09-22 21:26:05 +03:00
private:
struct Entry
{
typedef std::map<wchar_t, Entry> childen_t;
2013-05-04 14:15:47 +02:00
std::string mKeyword;
2022-09-22 21:26:05 +03:00
value_t mValue;
childen_t mChildren;
};
2013-05-04 14:15:47 +02:00
void seed_impl(std::string_view keyword, value_t value, size_t depth, Entry& entry)
2022-09-22 21:26:05 +03:00
{
auto ch = Misc::StringUtils::toLower(keyword.at(depth));
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
typename Entry::childen_t::iterator j = entry.mChildren.find(ch);
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (j == entry.mChildren.end())
{
entry.mChildren[ch].mValue = std::move(value);
entry.mChildren[ch].mKeyword = keyword;
2022-09-22 21:26:05 +03:00
}
else
2013-05-04 14:15:47 +02:00
{
2022-09-22 21:26:05 +03:00
if (j->second.mKeyword.size() > 0)
{
if (keyword == j->second.mKeyword)
throw std::runtime_error("duplicate keyword inserted");
2013-05-04 14:15:47 +02:00
const auto& pushKeyword = j->second.mKeyword;
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (depth >= pushKeyword.size())
throw std::runtime_error("unexpected");
2013-05-04 14:15:47 +02:00
2022-09-22 21:26:05 +03:00
if (depth + 1 < pushKeyword.size())
{
seed_impl(pushKeyword, j->second.mValue, depth + 1, j->second);
2022-09-22 21:26:05 +03:00
j->second.mKeyword.clear();
}
}
2022-09-22 21:26:05 +03:00
if (depth + 1 == keyword.size())
j->second.mKeyword = value;
else // depth+1 < keyword.size()
seed_impl(keyword, std::move(value), depth + 1, j->second);
2013-05-04 14:15:47 +02:00
}
}
2022-09-22 21:26:05 +03:00
Entry mRoot;
};
2013-05-04 14:15:47 +02:00
2013-05-06 15:14:39 +02:00
}
2013-05-04 14:15:47 +02:00
#endif