1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-04-07 13:20:25 +00:00

HyperTextParser as a class with proper keyword search caching

This commit is contained in:
myrix 2021-12-19 14:00:49 +03:00
parent 0688f55171
commit 3a9cfbfa53
6 changed files with 109 additions and 77 deletions

View File

@ -43,7 +43,6 @@
#include "../mwmechanics/actorutil.hpp" #include "../mwmechanics/actorutil.hpp"
#include "filter.hpp" #include "filter.hpp"
#include "hypertextparser.hpp"
namespace MWDialogue namespace MWDialogue
{ {
@ -80,7 +79,7 @@ namespace MWDialogue
{ {
std::vector<std::string> topicIdList; std::vector<std::string> topicIdList;
std::vector<HyperTextParser::Token> hypertext = HyperTextParser::parseHyperText(text); std::vector<HyperTextParser::Token> hypertext = mHyperTextParser.parseHyperText(text);
for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok) for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok)
{ {

View File

@ -16,6 +16,8 @@
#include "../mwscript/compilercontext.hpp" #include "../mwscript/compilercontext.hpp"
#include "hypertextparser.hpp"
namespace ESM namespace ESM
{ {
struct Dialogue; struct Dialogue;
@ -57,6 +59,8 @@ namespace MWDialogue
int mCurrentDisposition; int mCurrentDisposition;
int mPermanentDispositionChange; int mPermanentDispositionChange;
HyperTextParser mHyperTextParser;
std::vector<std::string> parseTopicIdsFromText (const std::string& text); std::vector<std::string> parseTopicIdsFromText (const std::string& text);
void addTopicsFromText (const std::string& text); void addTopicsFromText (const std::string& text);

View File

@ -6,95 +6,89 @@
#include "../mwworld/store.hpp" #include "../mwworld/store.hpp"
#include "../mwworld/esmstore.hpp" #include "../mwworld/esmstore.hpp"
#include "keywordsearch.hpp"
#include "hypertextparser.hpp" #include "hypertextparser.hpp"
namespace MWDialogue namespace MWDialogue
{ {
namespace HyperTextParser std::vector<HyperTextParser::Token> HyperTextParser::parseHyperText(const std::string & text)
{ {
std::vector<Token> parseHyperText(const std::string & text) std::vector<Token> result;
size_t pos_end = std::string::npos, iteration_pos = 0;
for(;;)
{ {
std::vector<Token> result; size_t pos_begin = text.find('@', iteration_pos);
size_t pos_end = std::string::npos, iteration_pos = 0; if (pos_begin != std::string::npos)
for(;;) pos_end = text.find('#', pos_begin);
if (pos_begin != std::string::npos && pos_end != std::string::npos)
{ {
size_t pos_begin = text.find('@', iteration_pos); if (pos_begin != iteration_pos)
if (pos_begin != std::string::npos) tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
pos_end = text.find('#', pos_begin);
if (pos_begin != std::string::npos && pos_end != std::string::npos) std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
{ result.emplace_back(link, Token::ExplicitLink);
if (pos_begin != iteration_pos)
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1); iteration_pos = pos_end + 1;
result.emplace_back(link, Token::ExplicitLink);
iteration_pos = pos_end + 1;
}
else
{
if (iteration_pos != text.size())
tokenizeKeywords(text.substr(iteration_pos), result);
break;
}
} }
else
return result;
}
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
{
static bool keywordSearchInitialized = false;
static KeywordSearch<std::string, int /*unused*/> keywordSearch;
if (!keywordSearchInitialized)
{ {
const MWWorld::Store<ESM::Dialogue> & dialogs = if (iteration_pos != text.size())
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>(); tokenizeKeywords(text.substr(iteration_pos), result);
break;
std::vector<std::string> keywordList;
keywordList.reserve(dialogs.getSize());
for (const auto& it : dialogs)
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
sort(keywordList.begin(), keywordList.end());
for (const auto& it : keywordList)
keywordSearch.seed(it, 0 /*unused*/);
keywordSearchInitialized = true;
}
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
keywordSearch.highlightKeywords(text.begin(), text.end(), matches);
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
{
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
} }
} }
size_t removePseudoAsterisks(std::string & phrase) return result;
}
void HyperTextParser::tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
{
const MWWorld::Store<ESM::Dialogue> & dialogs =
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
if (dialogs.getModPoint() != mKeywordModPoint)
{ {
size_t pseudoAsterisksCount = 0; mKeywordSearch.clear();
if( !phrase.empty() ) std::vector<std::string> keywordList;
{ keywordList.reserve(dialogs.getSize());
std::string::reverse_iterator rit = phrase.rbegin(); for (const auto& it : dialogs)
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
sort(keywordList.begin(), keywordList.end());
const char specialPseudoAsteriskCharacter = 127; for (const auto& it : keywordList)
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter ) mKeywordSearch.seed(it, 0 /*unused*/);
{
pseudoAsterisksCount++;
++rit;
}
}
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount); mKeywordModPoint = dialogs.getModPoint();
}
return pseudoAsterisksCount; std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
mKeywordSearch.highlightKeywords(text.begin(), text.end(), matches);
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
{
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
} }
} }
size_t HyperTextParser::removePseudoAsterisks(std::string & phrase)
{
size_t pseudoAsterisksCount = 0;
if( !phrase.empty() )
{
std::string::reverse_iterator rit = phrase.rbegin();
const char specialPseudoAsteriskCharacter = 127;
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
{
pseudoAsterisksCount++;
++rit;
}
}
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
return pseudoAsterisksCount;
}
} }

View File

@ -4,10 +4,17 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "keywordsearch.hpp"
namespace MWDialogue namespace MWDialogue
{ {
namespace HyperTextParser class HyperTextParser
{ {
uint64_t mKeywordModPoint;
KeywordSearch<std::string, int /*unused*/> mKeywordSearch;
public:
struct Token struct Token
{ {
enum Type enum Type
@ -24,12 +31,14 @@ namespace MWDialogue
Type mType; Type mType;
}; };
HyperTextParser() : mKeywordModPoint(0) {}
// In translations (at least Russian) the links are marked with @#, so // In translations (at least Russian) the links are marked with @#, so
// it should be a function to parse it // it should be a function to parse it
std::vector<Token> parseHyperText(const std::string & text); std::vector<Token> parseHyperText(const std::string & text);
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens); void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens);
size_t removePseudoAsterisks(std::string & phrase); static size_t removePseudoAsterisks(std::string & phrase);
} };
} }
#endif #endif

View File

@ -77,12 +77,13 @@ namespace MWWorld
template<typename T> template<typename T>
Store<T>::Store() Store<T>::Store()
: mModPoint(1)
{ {
} }
template<typename T> template<typename T>
Store<T>::Store(const Store<T>& orig) Store<T>::Store(const Store<T>& orig)
: mStatic(orig.mStatic) : mStatic(orig.mStatic), mModPoint(orig.mModPoint + 1)
{ {
} }
@ -93,6 +94,8 @@ namespace MWWorld
assert(mShared.size() >= mStatic.size()); assert(mShared.size() >= mStatic.size());
mShared.erase(mShared.begin() + mStatic.size(), mShared.end()); mShared.erase(mShared.begin() + mStatic.size(), mShared.end());
mDynamic.clear(); mDynamic.clear();
mModPoint++;
} }
template<typename T> template<typename T>
@ -162,6 +165,8 @@ namespace MWWorld
if (inserted.second) if (inserted.second)
mShared.push_back(&inserted.first->second); mShared.push_back(&inserted.first->second);
mModPoint++;
return RecordId(record.mId, isDeleted); return RecordId(record.mId, isDeleted);
} }
template<typename T> template<typename T>
@ -213,6 +218,9 @@ namespace MWWorld
T *ptr = &result.first->second; T *ptr = &result.first->second;
if (result.second) if (result.second)
mShared.push_back(ptr); mShared.push_back(ptr);
mModPoint++;
return ptr; return ptr;
} }
template<typename T> template<typename T>
@ -222,6 +230,9 @@ namespace MWWorld
T *ptr = &result.first->second; T *ptr = &result.first->second;
if (result.second) if (result.second)
mShared.push_back(ptr); mShared.push_back(ptr);
mModPoint++;
return ptr; return ptr;
} }
template<typename T> template<typename T>
@ -242,6 +253,8 @@ namespace MWWorld
++sharedIter; ++sharedIter;
} }
mStatic.erase(it); mStatic.erase(it);
mModPoint++;
} }
return true; return true;
@ -259,6 +272,9 @@ namespace MWWorld
for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) { for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) {
mShared.push_back(&it->second); mShared.push_back(&it->second);
} }
mModPoint++;
return true; return true;
} }
template<typename T> template<typename T>
@ -997,6 +1013,8 @@ namespace MWWorld
// TODO: verify and document this inconsistent behaviour // TODO: verify and document this inconsistent behaviour
// TODO: if we require this behaviour, maybe we should move it to the place that requires it // TODO: if we require this behaviour, maybe we should move it to the place that requires it
std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; }); std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; });
mModPoint++;
} }
template <> template <>
@ -1019,13 +1037,17 @@ namespace MWWorld
dialogue.mId = found->second.mId; dialogue.mId = found->second.mId;
} }
mModPoint++;
return RecordId(dialogue.mId, isDeleted); return RecordId(dialogue.mId, isDeleted);
} }
template<> template<>
bool Store<ESM::Dialogue>::eraseStatic(const std::string &id) bool Store<ESM::Dialogue>::eraseStatic(const std::string &id)
{ {
mStatic.erase(id); if (mStatic.erase(id))
mModPoint++;
return true; return true;
} }

View File

@ -159,6 +159,8 @@ namespace MWWorld
typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic; typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic;
Dynamic mDynamic; Dynamic mDynamic;
uint64_t mModPoint;
friend class ESMStore; friend class ESMStore;
public: public:
@ -203,6 +205,8 @@ namespace MWWorld
RecordId load(ESM::ESMReader &esm) override; RecordId load(ESM::ESMReader &esm) override;
void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override; void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override;
RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override; RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override;
uint64_t getModPoint() const { return mModPoint; }
}; };
template <> template <>