mirror of
https://gitlab.com/OpenMW/openmw.git
synced 2025-03-25 16:43:33 +00:00
HyperTextParser as a class with proper keyword search caching
This commit is contained in:
parent
0688f55171
commit
3a9cfbfa53
apps/openmw
@ -43,7 +43,6 @@
|
||||
#include "../mwmechanics/actorutil.hpp"
|
||||
|
||||
#include "filter.hpp"
|
||||
#include "hypertextparser.hpp"
|
||||
|
||||
namespace MWDialogue
|
||||
{
|
||||
@ -80,7 +79,7 @@ namespace MWDialogue
|
||||
{
|
||||
std::vector<std::string> topicIdList;
|
||||
|
||||
std::vector<HyperTextParser::Token> hypertext = HyperTextParser::parseHyperText(text);
|
||||
std::vector<HyperTextParser::Token> hypertext = mHyperTextParser.parseHyperText(text);
|
||||
|
||||
for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok)
|
||||
{
|
||||
|
@ -16,6 +16,8 @@
|
||||
|
||||
#include "../mwscript/compilercontext.hpp"
|
||||
|
||||
#include "hypertextparser.hpp"
|
||||
|
||||
namespace ESM
|
||||
{
|
||||
struct Dialogue;
|
||||
@ -57,6 +59,8 @@ namespace MWDialogue
|
||||
int mCurrentDisposition;
|
||||
int mPermanentDispositionChange;
|
||||
|
||||
HyperTextParser mHyperTextParser;
|
||||
|
||||
std::vector<std::string> parseTopicIdsFromText (const std::string& text);
|
||||
void addTopicsFromText (const std::string& text);
|
||||
|
||||
|
@ -6,95 +6,89 @@
|
||||
#include "../mwworld/store.hpp"
|
||||
#include "../mwworld/esmstore.hpp"
|
||||
|
||||
#include "keywordsearch.hpp"
|
||||
|
||||
#include "hypertextparser.hpp"
|
||||
|
||||
namespace MWDialogue
|
||||
{
|
||||
namespace HyperTextParser
|
||||
std::vector<HyperTextParser::Token> HyperTextParser::parseHyperText(const std::string & text)
|
||||
{
|
||||
std::vector<Token> parseHyperText(const std::string & text)
|
||||
std::vector<Token> result;
|
||||
size_t pos_end = std::string::npos, iteration_pos = 0;
|
||||
for(;;)
|
||||
{
|
||||
std::vector<Token> result;
|
||||
size_t pos_end = std::string::npos, iteration_pos = 0;
|
||||
for(;;)
|
||||
size_t pos_begin = text.find('@', iteration_pos);
|
||||
if (pos_begin != std::string::npos)
|
||||
pos_end = text.find('#', pos_begin);
|
||||
|
||||
if (pos_begin != std::string::npos && pos_end != std::string::npos)
|
||||
{
|
||||
size_t pos_begin = text.find('@', iteration_pos);
|
||||
if (pos_begin != std::string::npos)
|
||||
pos_end = text.find('#', pos_begin);
|
||||
if (pos_begin != iteration_pos)
|
||||
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
|
||||
|
||||
if (pos_begin != std::string::npos && pos_end != std::string::npos)
|
||||
{
|
||||
if (pos_begin != iteration_pos)
|
||||
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
|
||||
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
|
||||
result.emplace_back(link, Token::ExplicitLink);
|
||||
|
||||
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
|
||||
result.emplace_back(link, Token::ExplicitLink);
|
||||
|
||||
iteration_pos = pos_end + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (iteration_pos != text.size())
|
||||
tokenizeKeywords(text.substr(iteration_pos), result);
|
||||
break;
|
||||
}
|
||||
iteration_pos = pos_end + 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
|
||||
{
|
||||
static bool keywordSearchInitialized = false;
|
||||
static KeywordSearch<std::string, int /*unused*/> keywordSearch;
|
||||
|
||||
if (!keywordSearchInitialized)
|
||||
else
|
||||
{
|
||||
const MWWorld::Store<ESM::Dialogue> & dialogs =
|
||||
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
|
||||
|
||||
std::vector<std::string> keywordList;
|
||||
keywordList.reserve(dialogs.getSize());
|
||||
for (const auto& it : dialogs)
|
||||
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
|
||||
sort(keywordList.begin(), keywordList.end());
|
||||
|
||||
for (const auto& it : keywordList)
|
||||
keywordSearch.seed(it, 0 /*unused*/);
|
||||
|
||||
keywordSearchInitialized = true;
|
||||
}
|
||||
|
||||
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
|
||||
keywordSearch.highlightKeywords(text.begin(), text.end(), matches);
|
||||
|
||||
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
|
||||
{
|
||||
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
|
||||
if (iteration_pos != text.size())
|
||||
tokenizeKeywords(text.substr(iteration_pos), result);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
size_t removePseudoAsterisks(std::string & phrase)
|
||||
return result;
|
||||
}
|
||||
|
||||
void HyperTextParser::tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
|
||||
{
|
||||
const MWWorld::Store<ESM::Dialogue> & dialogs =
|
||||
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
|
||||
|
||||
if (dialogs.getModPoint() != mKeywordModPoint)
|
||||
{
|
||||
size_t pseudoAsterisksCount = 0;
|
||||
mKeywordSearch.clear();
|
||||
|
||||
if( !phrase.empty() )
|
||||
{
|
||||
std::string::reverse_iterator rit = phrase.rbegin();
|
||||
std::vector<std::string> keywordList;
|
||||
keywordList.reserve(dialogs.getSize());
|
||||
for (const auto& it : dialogs)
|
||||
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
|
||||
sort(keywordList.begin(), keywordList.end());
|
||||
|
||||
const char specialPseudoAsteriskCharacter = 127;
|
||||
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
|
||||
{
|
||||
pseudoAsterisksCount++;
|
||||
++rit;
|
||||
}
|
||||
}
|
||||
for (const auto& it : keywordList)
|
||||
mKeywordSearch.seed(it, 0 /*unused*/);
|
||||
|
||||
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
|
||||
mKeywordModPoint = dialogs.getModPoint();
|
||||
}
|
||||
|
||||
return pseudoAsterisksCount;
|
||||
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
|
||||
mKeywordSearch.highlightKeywords(text.begin(), text.end(), matches);
|
||||
|
||||
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
|
||||
{
|
||||
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
|
||||
}
|
||||
}
|
||||
|
||||
size_t HyperTextParser::removePseudoAsterisks(std::string & phrase)
|
||||
{
|
||||
size_t pseudoAsterisksCount = 0;
|
||||
|
||||
if( !phrase.empty() )
|
||||
{
|
||||
std::string::reverse_iterator rit = phrase.rbegin();
|
||||
|
||||
const char specialPseudoAsteriskCharacter = 127;
|
||||
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
|
||||
{
|
||||
pseudoAsterisksCount++;
|
||||
++rit;
|
||||
}
|
||||
}
|
||||
|
||||
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
|
||||
|
||||
return pseudoAsterisksCount;
|
||||
}
|
||||
}
|
||||
|
@ -4,10 +4,17 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "keywordsearch.hpp"
|
||||
|
||||
namespace MWDialogue
|
||||
{
|
||||
namespace HyperTextParser
|
||||
class HyperTextParser
|
||||
{
|
||||
uint64_t mKeywordModPoint;
|
||||
KeywordSearch<std::string, int /*unused*/> mKeywordSearch;
|
||||
|
||||
public:
|
||||
|
||||
struct Token
|
||||
{
|
||||
enum Type
|
||||
@ -24,12 +31,14 @@ namespace MWDialogue
|
||||
Type mType;
|
||||
};
|
||||
|
||||
HyperTextParser() : mKeywordModPoint(0) {}
|
||||
|
||||
// In translations (at least Russian) the links are marked with @#, so
|
||||
// it should be a function to parse it
|
||||
std::vector<Token> parseHyperText(const std::string & text);
|
||||
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens);
|
||||
size_t removePseudoAsterisks(std::string & phrase);
|
||||
}
|
||||
static size_t removePseudoAsterisks(std::string & phrase);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -77,12 +77,13 @@ namespace MWWorld
|
||||
|
||||
template<typename T>
|
||||
Store<T>::Store()
|
||||
: mModPoint(1)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
Store<T>::Store(const Store<T>& orig)
|
||||
: mStatic(orig.mStatic)
|
||||
: mStatic(orig.mStatic), mModPoint(orig.mModPoint + 1)
|
||||
{
|
||||
}
|
||||
|
||||
@ -93,6 +94,8 @@ namespace MWWorld
|
||||
assert(mShared.size() >= mStatic.size());
|
||||
mShared.erase(mShared.begin() + mStatic.size(), mShared.end());
|
||||
mDynamic.clear();
|
||||
|
||||
mModPoint++;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
@ -162,6 +165,8 @@ namespace MWWorld
|
||||
if (inserted.second)
|
||||
mShared.push_back(&inserted.first->second);
|
||||
|
||||
mModPoint++;
|
||||
|
||||
return RecordId(record.mId, isDeleted);
|
||||
}
|
||||
template<typename T>
|
||||
@ -213,6 +218,9 @@ namespace MWWorld
|
||||
T *ptr = &result.first->second;
|
||||
if (result.second)
|
||||
mShared.push_back(ptr);
|
||||
|
||||
mModPoint++;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
template<typename T>
|
||||
@ -222,6 +230,9 @@ namespace MWWorld
|
||||
T *ptr = &result.first->second;
|
||||
if (result.second)
|
||||
mShared.push_back(ptr);
|
||||
|
||||
mModPoint++;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
template<typename T>
|
||||
@ -242,6 +253,8 @@ namespace MWWorld
|
||||
++sharedIter;
|
||||
}
|
||||
mStatic.erase(it);
|
||||
|
||||
mModPoint++;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -259,6 +272,9 @@ namespace MWWorld
|
||||
for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) {
|
||||
mShared.push_back(&it->second);
|
||||
}
|
||||
|
||||
mModPoint++;
|
||||
|
||||
return true;
|
||||
}
|
||||
template<typename T>
|
||||
@ -997,6 +1013,8 @@ namespace MWWorld
|
||||
// TODO: verify and document this inconsistent behaviour
|
||||
// TODO: if we require this behaviour, maybe we should move it to the place that requires it
|
||||
std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; });
|
||||
|
||||
mModPoint++;
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -1018,6 +1036,8 @@ namespace MWWorld
|
||||
found->second.loadData(esm, isDeleted);
|
||||
dialogue.mId = found->second.mId;
|
||||
}
|
||||
|
||||
mModPoint++;
|
||||
|
||||
return RecordId(dialogue.mId, isDeleted);
|
||||
}
|
||||
@ -1025,7 +1045,9 @@ namespace MWWorld
|
||||
template<>
|
||||
bool Store<ESM::Dialogue>::eraseStatic(const std::string &id)
|
||||
{
|
||||
mStatic.erase(id);
|
||||
if (mStatic.erase(id))
|
||||
mModPoint++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -159,6 +159,8 @@ namespace MWWorld
|
||||
typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic;
|
||||
Dynamic mDynamic;
|
||||
|
||||
uint64_t mModPoint;
|
||||
|
||||
friend class ESMStore;
|
||||
|
||||
public:
|
||||
@ -203,6 +205,8 @@ namespace MWWorld
|
||||
RecordId load(ESM::ESMReader &esm) override;
|
||||
void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override;
|
||||
RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override;
|
||||
|
||||
uint64_t getModPoint() const { return mModPoint; }
|
||||
};
|
||||
|
||||
template <>
|
||||
|
Loading…
x
Reference in New Issue
Block a user