1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-03-25 16:43:33 +00:00

HyperTextParser as a class with proper keyword search caching

This commit is contained in:
myrix 2021-12-19 14:00:49 +03:00
parent 0688f55171
commit 3a9cfbfa53
6 changed files with 109 additions and 77 deletions

@ -43,7 +43,6 @@
#include "../mwmechanics/actorutil.hpp"
#include "filter.hpp"
#include "hypertextparser.hpp"
namespace MWDialogue
{
@ -80,7 +79,7 @@ namespace MWDialogue
{
std::vector<std::string> topicIdList;
std::vector<HyperTextParser::Token> hypertext = HyperTextParser::parseHyperText(text);
std::vector<HyperTextParser::Token> hypertext = mHyperTextParser.parseHyperText(text);
for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok)
{

@ -16,6 +16,8 @@
#include "../mwscript/compilercontext.hpp"
#include "hypertextparser.hpp"
namespace ESM
{
struct Dialogue;
@ -57,6 +59,8 @@ namespace MWDialogue
int mCurrentDisposition;
int mPermanentDispositionChange;
HyperTextParser mHyperTextParser;
std::vector<std::string> parseTopicIdsFromText (const std::string& text);
void addTopicsFromText (const std::string& text);

@ -6,95 +6,89 @@
#include "../mwworld/store.hpp"
#include "../mwworld/esmstore.hpp"
#include "keywordsearch.hpp"
#include "hypertextparser.hpp"
namespace MWDialogue
{
namespace HyperTextParser
std::vector<HyperTextParser::Token> HyperTextParser::parseHyperText(const std::string & text)
{
std::vector<Token> parseHyperText(const std::string & text)
std::vector<Token> result;
size_t pos_end = std::string::npos, iteration_pos = 0;
for(;;)
{
std::vector<Token> result;
size_t pos_end = std::string::npos, iteration_pos = 0;
for(;;)
size_t pos_begin = text.find('@', iteration_pos);
if (pos_begin != std::string::npos)
pos_end = text.find('#', pos_begin);
if (pos_begin != std::string::npos && pos_end != std::string::npos)
{
size_t pos_begin = text.find('@', iteration_pos);
if (pos_begin != std::string::npos)
pos_end = text.find('#', pos_begin);
if (pos_begin != iteration_pos)
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
if (pos_begin != std::string::npos && pos_end != std::string::npos)
{
if (pos_begin != iteration_pos)
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
result.emplace_back(link, Token::ExplicitLink);
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
result.emplace_back(link, Token::ExplicitLink);
iteration_pos = pos_end + 1;
}
else
{
if (iteration_pos != text.size())
tokenizeKeywords(text.substr(iteration_pos), result);
break;
}
iteration_pos = pos_end + 1;
}
return result;
}
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
{
static bool keywordSearchInitialized = false;
static KeywordSearch<std::string, int /*unused*/> keywordSearch;
if (!keywordSearchInitialized)
else
{
const MWWorld::Store<ESM::Dialogue> & dialogs =
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
std::vector<std::string> keywordList;
keywordList.reserve(dialogs.getSize());
for (const auto& it : dialogs)
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
sort(keywordList.begin(), keywordList.end());
for (const auto& it : keywordList)
keywordSearch.seed(it, 0 /*unused*/);
keywordSearchInitialized = true;
}
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
keywordSearch.highlightKeywords(text.begin(), text.end(), matches);
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
{
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
if (iteration_pos != text.size())
tokenizeKeywords(text.substr(iteration_pos), result);
break;
}
}
size_t removePseudoAsterisks(std::string & phrase)
return result;
}
void HyperTextParser::tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
{
const MWWorld::Store<ESM::Dialogue> & dialogs =
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
if (dialogs.getModPoint() != mKeywordModPoint)
{
size_t pseudoAsterisksCount = 0;
mKeywordSearch.clear();
if( !phrase.empty() )
{
std::string::reverse_iterator rit = phrase.rbegin();
std::vector<std::string> keywordList;
keywordList.reserve(dialogs.getSize());
for (const auto& it : dialogs)
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
sort(keywordList.begin(), keywordList.end());
const char specialPseudoAsteriskCharacter = 127;
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
{
pseudoAsterisksCount++;
++rit;
}
}
for (const auto& it : keywordList)
mKeywordSearch.seed(it, 0 /*unused*/);
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
mKeywordModPoint = dialogs.getModPoint();
}
return pseudoAsterisksCount;
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
mKeywordSearch.highlightKeywords(text.begin(), text.end(), matches);
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
{
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
}
}
size_t HyperTextParser::removePseudoAsterisks(std::string & phrase)
{
size_t pseudoAsterisksCount = 0;
if( !phrase.empty() )
{
std::string::reverse_iterator rit = phrase.rbegin();
const char specialPseudoAsteriskCharacter = 127;
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
{
pseudoAsterisksCount++;
++rit;
}
}
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
return pseudoAsterisksCount;
}
}

@ -4,10 +4,17 @@
#include <string>
#include <vector>
#include "keywordsearch.hpp"
namespace MWDialogue
{
namespace HyperTextParser
class HyperTextParser
{
uint64_t mKeywordModPoint;
KeywordSearch<std::string, int /*unused*/> mKeywordSearch;
public:
struct Token
{
enum Type
@ -24,12 +31,14 @@ namespace MWDialogue
Type mType;
};
HyperTextParser() : mKeywordModPoint(0) {}
// In translations (at least Russian) the links are marked with @#, so
// it should be a function to parse it
std::vector<Token> parseHyperText(const std::string & text);
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens);
size_t removePseudoAsterisks(std::string & phrase);
}
static size_t removePseudoAsterisks(std::string & phrase);
};
}
#endif

@ -77,12 +77,13 @@ namespace MWWorld
template<typename T>
Store<T>::Store()
: mModPoint(1)
{
}
template<typename T>
Store<T>::Store(const Store<T>& orig)
: mStatic(orig.mStatic)
: mStatic(orig.mStatic), mModPoint(orig.mModPoint + 1)
{
}
@ -93,6 +94,8 @@ namespace MWWorld
assert(mShared.size() >= mStatic.size());
mShared.erase(mShared.begin() + mStatic.size(), mShared.end());
mDynamic.clear();
mModPoint++;
}
template<typename T>
@ -162,6 +165,8 @@ namespace MWWorld
if (inserted.second)
mShared.push_back(&inserted.first->second);
mModPoint++;
return RecordId(record.mId, isDeleted);
}
template<typename T>
@ -213,6 +218,9 @@ namespace MWWorld
T *ptr = &result.first->second;
if (result.second)
mShared.push_back(ptr);
mModPoint++;
return ptr;
}
template<typename T>
@ -222,6 +230,9 @@ namespace MWWorld
T *ptr = &result.first->second;
if (result.second)
mShared.push_back(ptr);
mModPoint++;
return ptr;
}
template<typename T>
@ -242,6 +253,8 @@ namespace MWWorld
++sharedIter;
}
mStatic.erase(it);
mModPoint++;
}
return true;
@ -259,6 +272,9 @@ namespace MWWorld
for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) {
mShared.push_back(&it->second);
}
mModPoint++;
return true;
}
template<typename T>
@ -997,6 +1013,8 @@ namespace MWWorld
// TODO: verify and document this inconsistent behaviour
// TODO: if we require this behaviour, maybe we should move it to the place that requires it
std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; });
mModPoint++;
}
template <>
@ -1018,6 +1036,8 @@ namespace MWWorld
found->second.loadData(esm, isDeleted);
dialogue.mId = found->second.mId;
}
mModPoint++;
return RecordId(dialogue.mId, isDeleted);
}
@ -1025,7 +1045,9 @@ namespace MWWorld
template<>
bool Store<ESM::Dialogue>::eraseStatic(const std::string &id)
{
mStatic.erase(id);
if (mStatic.erase(id))
mModPoint++;
return true;
}

@ -159,6 +159,8 @@ namespace MWWorld
typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic;
Dynamic mDynamic;
uint64_t mModPoint;
friend class ESMStore;
public:
@ -203,6 +205,8 @@ namespace MWWorld
RecordId load(ESM::ESMReader &esm) override;
void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override;
RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override;
uint64_t getModPoint() const { return mModPoint; }
};
template <>