mirror of
https://gitlab.com/OpenMW/openmw.git
synced 2025-04-07 13:20:25 +00:00
HyperTextParser as a class with proper keyword search caching
This commit is contained in:
parent
0688f55171
commit
3a9cfbfa53
@ -43,7 +43,6 @@
|
|||||||
#include "../mwmechanics/actorutil.hpp"
|
#include "../mwmechanics/actorutil.hpp"
|
||||||
|
|
||||||
#include "filter.hpp"
|
#include "filter.hpp"
|
||||||
#include "hypertextparser.hpp"
|
|
||||||
|
|
||||||
namespace MWDialogue
|
namespace MWDialogue
|
||||||
{
|
{
|
||||||
@ -80,7 +79,7 @@ namespace MWDialogue
|
|||||||
{
|
{
|
||||||
std::vector<std::string> topicIdList;
|
std::vector<std::string> topicIdList;
|
||||||
|
|
||||||
std::vector<HyperTextParser::Token> hypertext = HyperTextParser::parseHyperText(text);
|
std::vector<HyperTextParser::Token> hypertext = mHyperTextParser.parseHyperText(text);
|
||||||
|
|
||||||
for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok)
|
for (std::vector<HyperTextParser::Token>::iterator tok = hypertext.begin(); tok != hypertext.end(); ++tok)
|
||||||
{
|
{
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
|
|
||||||
#include "../mwscript/compilercontext.hpp"
|
#include "../mwscript/compilercontext.hpp"
|
||||||
|
|
||||||
|
#include "hypertextparser.hpp"
|
||||||
|
|
||||||
namespace ESM
|
namespace ESM
|
||||||
{
|
{
|
||||||
struct Dialogue;
|
struct Dialogue;
|
||||||
@ -57,6 +59,8 @@ namespace MWDialogue
|
|||||||
int mCurrentDisposition;
|
int mCurrentDisposition;
|
||||||
int mPermanentDispositionChange;
|
int mPermanentDispositionChange;
|
||||||
|
|
||||||
|
HyperTextParser mHyperTextParser;
|
||||||
|
|
||||||
std::vector<std::string> parseTopicIdsFromText (const std::string& text);
|
std::vector<std::string> parseTopicIdsFromText (const std::string& text);
|
||||||
void addTopicsFromText (const std::string& text);
|
void addTopicsFromText (const std::string& text);
|
||||||
|
|
||||||
|
@ -6,95 +6,89 @@
|
|||||||
#include "../mwworld/store.hpp"
|
#include "../mwworld/store.hpp"
|
||||||
#include "../mwworld/esmstore.hpp"
|
#include "../mwworld/esmstore.hpp"
|
||||||
|
|
||||||
#include "keywordsearch.hpp"
|
|
||||||
|
|
||||||
#include "hypertextparser.hpp"
|
#include "hypertextparser.hpp"
|
||||||
|
|
||||||
namespace MWDialogue
|
namespace MWDialogue
|
||||||
{
|
{
|
||||||
namespace HyperTextParser
|
std::vector<HyperTextParser::Token> HyperTextParser::parseHyperText(const std::string & text)
|
||||||
{
|
{
|
||||||
std::vector<Token> parseHyperText(const std::string & text)
|
std::vector<Token> result;
|
||||||
|
size_t pos_end = std::string::npos, iteration_pos = 0;
|
||||||
|
for(;;)
|
||||||
{
|
{
|
||||||
std::vector<Token> result;
|
size_t pos_begin = text.find('@', iteration_pos);
|
||||||
size_t pos_end = std::string::npos, iteration_pos = 0;
|
if (pos_begin != std::string::npos)
|
||||||
for(;;)
|
pos_end = text.find('#', pos_begin);
|
||||||
|
|
||||||
|
if (pos_begin != std::string::npos && pos_end != std::string::npos)
|
||||||
{
|
{
|
||||||
size_t pos_begin = text.find('@', iteration_pos);
|
if (pos_begin != iteration_pos)
|
||||||
if (pos_begin != std::string::npos)
|
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
|
||||||
pos_end = text.find('#', pos_begin);
|
|
||||||
|
|
||||||
if (pos_begin != std::string::npos && pos_end != std::string::npos)
|
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
|
||||||
{
|
result.emplace_back(link, Token::ExplicitLink);
|
||||||
if (pos_begin != iteration_pos)
|
|
||||||
tokenizeKeywords(text.substr(iteration_pos, pos_begin - iteration_pos), result);
|
|
||||||
|
|
||||||
std::string link = text.substr(pos_begin + 1, pos_end - pos_begin - 1);
|
iteration_pos = pos_end + 1;
|
||||||
result.emplace_back(link, Token::ExplicitLink);
|
|
||||||
|
|
||||||
iteration_pos = pos_end + 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (iteration_pos != text.size())
|
|
||||||
tokenizeKeywords(text.substr(iteration_pos), result);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
|
|
||||||
{
|
|
||||||
static bool keywordSearchInitialized = false;
|
|
||||||
static KeywordSearch<std::string, int /*unused*/> keywordSearch;
|
|
||||||
|
|
||||||
if (!keywordSearchInitialized)
|
|
||||||
{
|
{
|
||||||
const MWWorld::Store<ESM::Dialogue> & dialogs =
|
if (iteration_pos != text.size())
|
||||||
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
|
tokenizeKeywords(text.substr(iteration_pos), result);
|
||||||
|
break;
|
||||||
std::vector<std::string> keywordList;
|
|
||||||
keywordList.reserve(dialogs.getSize());
|
|
||||||
for (const auto& it : dialogs)
|
|
||||||
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
|
|
||||||
sort(keywordList.begin(), keywordList.end());
|
|
||||||
|
|
||||||
for (const auto& it : keywordList)
|
|
||||||
keywordSearch.seed(it, 0 /*unused*/);
|
|
||||||
|
|
||||||
keywordSearchInitialized = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
|
|
||||||
keywordSearch.highlightKeywords(text.begin(), text.end(), matches);
|
|
||||||
|
|
||||||
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
|
|
||||||
{
|
|
||||||
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t removePseudoAsterisks(std::string & phrase)
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HyperTextParser::tokenizeKeywords(const std::string & text, std::vector<Token> & tokens)
|
||||||
|
{
|
||||||
|
const MWWorld::Store<ESM::Dialogue> & dialogs =
|
||||||
|
MWBase::Environment::get().getWorld()->getStore().get<ESM::Dialogue>();
|
||||||
|
|
||||||
|
if (dialogs.getModPoint() != mKeywordModPoint)
|
||||||
{
|
{
|
||||||
size_t pseudoAsterisksCount = 0;
|
mKeywordSearch.clear();
|
||||||
|
|
||||||
if( !phrase.empty() )
|
std::vector<std::string> keywordList;
|
||||||
{
|
keywordList.reserve(dialogs.getSize());
|
||||||
std::string::reverse_iterator rit = phrase.rbegin();
|
for (const auto& it : dialogs)
|
||||||
|
keywordList.push_back(Misc::StringUtils::lowerCase(it.mId));
|
||||||
|
sort(keywordList.begin(), keywordList.end());
|
||||||
|
|
||||||
const char specialPseudoAsteriskCharacter = 127;
|
for (const auto& it : keywordList)
|
||||||
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
|
mKeywordSearch.seed(it, 0 /*unused*/);
|
||||||
{
|
|
||||||
pseudoAsterisksCount++;
|
|
||||||
++rit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
|
mKeywordModPoint = dialogs.getModPoint();
|
||||||
|
}
|
||||||
|
|
||||||
return pseudoAsterisksCount;
|
std::vector<KeywordSearch<std::string, int /*unused*/>::Match> matches;
|
||||||
|
mKeywordSearch.highlightKeywords(text.begin(), text.end(), matches);
|
||||||
|
|
||||||
|
for (std::vector<KeywordSearch<std::string, int /*unused*/>::Match>::const_iterator it = matches.begin(); it != matches.end(); ++it)
|
||||||
|
{
|
||||||
|
tokens.emplace_back(std::string(it->mBeg, it->mEnd), Token::ImplicitKeyword);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t HyperTextParser::removePseudoAsterisks(std::string & phrase)
|
||||||
|
{
|
||||||
|
size_t pseudoAsterisksCount = 0;
|
||||||
|
|
||||||
|
if( !phrase.empty() )
|
||||||
|
{
|
||||||
|
std::string::reverse_iterator rit = phrase.rbegin();
|
||||||
|
|
||||||
|
const char specialPseudoAsteriskCharacter = 127;
|
||||||
|
while( rit != phrase.rend() && *rit == specialPseudoAsteriskCharacter )
|
||||||
|
{
|
||||||
|
pseudoAsterisksCount++;
|
||||||
|
++rit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
phrase = phrase.substr(0, phrase.length() - pseudoAsterisksCount);
|
||||||
|
|
||||||
|
return pseudoAsterisksCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,17 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "keywordsearch.hpp"
|
||||||
|
|
||||||
namespace MWDialogue
|
namespace MWDialogue
|
||||||
{
|
{
|
||||||
namespace HyperTextParser
|
class HyperTextParser
|
||||||
{
|
{
|
||||||
|
uint64_t mKeywordModPoint;
|
||||||
|
KeywordSearch<std::string, int /*unused*/> mKeywordSearch;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
struct Token
|
struct Token
|
||||||
{
|
{
|
||||||
enum Type
|
enum Type
|
||||||
@ -24,12 +31,14 @@ namespace MWDialogue
|
|||||||
Type mType;
|
Type mType;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
HyperTextParser() : mKeywordModPoint(0) {}
|
||||||
|
|
||||||
// In translations (at least Russian) the links are marked with @#, so
|
// In translations (at least Russian) the links are marked with @#, so
|
||||||
// it should be a function to parse it
|
// it should be a function to parse it
|
||||||
std::vector<Token> parseHyperText(const std::string & text);
|
std::vector<Token> parseHyperText(const std::string & text);
|
||||||
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens);
|
void tokenizeKeywords(const std::string & text, std::vector<Token> & tokens);
|
||||||
size_t removePseudoAsterisks(std::string & phrase);
|
static size_t removePseudoAsterisks(std::string & phrase);
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -77,12 +77,13 @@ namespace MWWorld
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Store<T>::Store()
|
Store<T>::Store()
|
||||||
|
: mModPoint(1)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
Store<T>::Store(const Store<T>& orig)
|
Store<T>::Store(const Store<T>& orig)
|
||||||
: mStatic(orig.mStatic)
|
: mStatic(orig.mStatic), mModPoint(orig.mModPoint + 1)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,6 +94,8 @@ namespace MWWorld
|
|||||||
assert(mShared.size() >= mStatic.size());
|
assert(mShared.size() >= mStatic.size());
|
||||||
mShared.erase(mShared.begin() + mStatic.size(), mShared.end());
|
mShared.erase(mShared.begin() + mStatic.size(), mShared.end());
|
||||||
mDynamic.clear();
|
mDynamic.clear();
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -162,6 +165,8 @@ namespace MWWorld
|
|||||||
if (inserted.second)
|
if (inserted.second)
|
||||||
mShared.push_back(&inserted.first->second);
|
mShared.push_back(&inserted.first->second);
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return RecordId(record.mId, isDeleted);
|
return RecordId(record.mId, isDeleted);
|
||||||
}
|
}
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -213,6 +218,9 @@ namespace MWWorld
|
|||||||
T *ptr = &result.first->second;
|
T *ptr = &result.first->second;
|
||||||
if (result.second)
|
if (result.second)
|
||||||
mShared.push_back(ptr);
|
mShared.push_back(ptr);
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -222,6 +230,9 @@ namespace MWWorld
|
|||||||
T *ptr = &result.first->second;
|
T *ptr = &result.first->second;
|
||||||
if (result.second)
|
if (result.second)
|
||||||
mShared.push_back(ptr);
|
mShared.push_back(ptr);
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -242,6 +253,8 @@ namespace MWWorld
|
|||||||
++sharedIter;
|
++sharedIter;
|
||||||
}
|
}
|
||||||
mStatic.erase(it);
|
mStatic.erase(it);
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -259,6 +272,9 @@ namespace MWWorld
|
|||||||
for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) {
|
for (auto it = mDynamic.begin(); it != mDynamic.end(); ++it) {
|
||||||
mShared.push_back(&it->second);
|
mShared.push_back(&it->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@ -997,6 +1013,8 @@ namespace MWWorld
|
|||||||
// TODO: verify and document this inconsistent behaviour
|
// TODO: verify and document this inconsistent behaviour
|
||||||
// TODO: if we require this behaviour, maybe we should move it to the place that requires it
|
// TODO: if we require this behaviour, maybe we should move it to the place that requires it
|
||||||
std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; });
|
std::sort(mShared.begin(), mShared.end(), [](const ESM::Dialogue* l, const ESM::Dialogue* r) -> bool { return l->mId < r->mId; });
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
@ -1019,13 +1037,17 @@ namespace MWWorld
|
|||||||
dialogue.mId = found->second.mId;
|
dialogue.mId = found->second.mId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return RecordId(dialogue.mId, isDeleted);
|
return RecordId(dialogue.mId, isDeleted);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
bool Store<ESM::Dialogue>::eraseStatic(const std::string &id)
|
bool Store<ESM::Dialogue>::eraseStatic(const std::string &id)
|
||||||
{
|
{
|
||||||
mStatic.erase(id);
|
if (mStatic.erase(id))
|
||||||
|
mModPoint++;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -159,6 +159,8 @@ namespace MWWorld
|
|||||||
typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic;
|
typedef std::unordered_map<std::string, T, Misc::StringUtils::CiHash, Misc::StringUtils::CiEqual> Dynamic;
|
||||||
Dynamic mDynamic;
|
Dynamic mDynamic;
|
||||||
|
|
||||||
|
uint64_t mModPoint;
|
||||||
|
|
||||||
friend class ESMStore;
|
friend class ESMStore;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -203,6 +205,8 @@ namespace MWWorld
|
|||||||
RecordId load(ESM::ESMReader &esm) override;
|
RecordId load(ESM::ESMReader &esm) override;
|
||||||
void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override;
|
void write(ESM::ESMWriter& writer, Loading::Listener& progress) const override;
|
||||||
RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override;
|
RecordId read(ESM::ESMReader& reader, bool overrideOnly = false) override;
|
||||||
|
|
||||||
|
uint64_t getModPoint() const { return mModPoint; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user