diff --git a/apps/esmtool/tes4.cpp b/apps/esmtool/tes4.cpp index 904040428d..8eaf1b3464 100644 --- a/apps/esmtool/tes4.cpp +++ b/apps/esmtool/tes4.cpp @@ -526,8 +526,7 @@ namespace EsmTool try { const ToUTF8::StatelessUtf8Encoder encoder(ToUTF8::calculateEncoding(info.encoding)); - ESM4::Reader reader(std::move(stream), info.filename); - reader.setEncoder(&encoder); + ESM4::Reader reader(std::move(stream), info.filename, nullptr, &encoder, true); const Params params(info); if (!params.mQuite) diff --git a/apps/openmw/mwworld/esmloader.cpp b/apps/openmw/mwworld/esmloader.cpp index 9190d53ea5..e586a4c204 100644 --- a/apps/openmw/mwworld/esmloader.cpp +++ b/apps/openmw/mwworld/esmloader.cpp @@ -64,9 +64,8 @@ namespace MWWorld } case ESM::Format::Tes4: { - ESM4::Reader readerESM4( - std::move(stream), filepath, MWBase::Environment::get().getResourceSystem()->getVFS()); - readerESM4.setEncoder(mReaders.getStatelessEncoder()); + ESM4::Reader readerESM4(std::move(stream), filepath, + MWBase::Environment::get().getResourceSystem()->getVFS(), mReaders.getStatelessEncoder()); readerESM4.setModIndex(index); readerESM4.updateModIndices(mNameToIndex); mStore.loadESM4(readerESM4); diff --git a/components/esm4/common.hpp b/components/esm4/common.hpp index 77c24f6121..61f18e650d 100644 --- a/components/esm4/common.hpp +++ b/components/esm4/common.hpp @@ -571,6 +571,7 @@ namespace ESM4 SUB_FLTR = fourCC("FLTR"), // TES5 SUB_QTGL = fourCC("QTGL"), // TES5 SUB_TWAT = fourCC("TWAT"), // TES5 + SUB_SPMV = fourCC("SPMV"), // TES5 SUB_XIBS = fourCC("XIBS"), // FO3 SUB_REPL = fourCC("REPL"), // FO3 SUB_BIPL = fourCC("BIPL"), // FO3 diff --git a/components/esm4/loadhdpt.cpp b/components/esm4/loadhdpt.cpp index f308a6a3a2..51f4b629f3 100644 --- a/components/esm4/loadhdpt.cpp +++ b/components/esm4/loadhdpt.cpp @@ -94,6 +94,7 @@ void ESM4::HeadPart::load(ESM4::Reader& reader) case ESM4::SUB_MODS: case ESM4::SUB_MODT: case ESM4::SUB_RNAM: + case ESM4::SUB_CNAM: { // std::cout << "HDPT " << ESM::printName(subHdr.typeId) << " skipping..." << std::endl; reader.skipSubRecordData(); diff --git a/components/esm4/loadrace.cpp b/components/esm4/loadrace.cpp index 04f3bd0fc2..1576743ed3 100644 --- a/components/esm4/loadrace.cpp +++ b/components/esm4/loadrace.cpp @@ -677,6 +677,8 @@ void ESM4::Race::load(ESM4::Reader& reader) case ESM4::SUB_SPED: case ESM4::SUB_SWMV: case ESM4::SUB_WKMV: + case ESM4::SUB_SPMV: + case ESM4::SUB_ATKR: // case ESM4::SUB_YNAM: // FO3 case ESM4::SUB_NAM2: // FO3 diff --git a/components/esm4/loadscrl.cpp b/components/esm4/loadscrl.cpp index 30cc8818fd..bc77d80d07 100644 --- a/components/esm4/loadscrl.cpp +++ b/components/esm4/loadscrl.cpp @@ -72,6 +72,7 @@ void ESM4::Scroll::load(ESM4::Reader& reader) case ESM4::SUB_MDOB: case ESM4::SUB_MODT: case ESM4::SUB_SPIT: + case ESM4::SUB_CIS2: { // std::cout << "SCRL " << ESM::printName(subHdr.typeId) << " skipping..." << std::endl; reader.skipSubRecordData(); diff --git a/components/esm4/reader.cpp b/components/esm4/reader.cpp index ff578b23a4..4a3569eeac 100644 --- a/components/esm4/reader.cpp +++ b/components/esm4/reader.cpp @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -56,6 +57,23 @@ namespace ESM4 { + namespace + { + std::u8string_view getStringsSuffix(LocalizedStringType type) + { + switch (type) + { + case LocalizedStringType::Strings: + return u8"_English.STRINGS"; + case LocalizedStringType::ILStrings: + return u8"_English.ILSTRINGS"; + case LocalizedStringType::DLStrings: + return u8"_English.DLSTRINGS"; + } + + throw std::logic_error("Unsupported LocalizedStringType: " + std::to_string(static_cast(type))); + } + } ReaderContext::ReaderContext() : modIndex(0) @@ -72,11 +90,13 @@ namespace ESM4 subRecordHeader.dataSize = 0; } - Reader::Reader(Files::IStreamPtr&& esmStream, const std::filesystem::path& filename, VFS::Manager const* vfs) + Reader::Reader(Files::IStreamPtr&& esmStream, const std::filesystem::path& filename, VFS::Manager const* vfs, + const ToUTF8::StatelessUtf8Encoder* encoder, bool ignoreMissingLocalizedStrings) : mVFS(vfs) - , mEncoder(nullptr) + , mEncoder(encoder) , mFileSize(0) , mStream(std::move(esmStream)) + , mIgnoreMissingLocalizedStrings(ignoreMissingLocalizedStrings) { // used by ESMReader only? mCtx.filename = filename; @@ -209,58 +229,136 @@ namespace ESM4 if ((mHeader.mFlags & Rec_ESM) == 0 || (mHeader.mFlags & Rec_Localized) == 0) return; - const auto filename = mCtx.filename.stem().filename().u8string(); + const std::u8string prefix = mCtx.filename.stem().filename().u8string(); - static const std::filesystem::path s("Strings"); - buildLStringIndex(s / (filename + u8"_English.STRINGS"), Type_Strings); - buildLStringIndex(s / (filename + u8"_English.ILSTRINGS"), Type_ILStrings); - buildLStringIndex(s / (filename + u8"_English.DLSTRINGS"), Type_DLStrings); + buildLStringIndex(LocalizedStringType::Strings, prefix); + buildLStringIndex(LocalizedStringType::ILStrings, prefix); + buildLStringIndex(LocalizedStringType::DLStrings, prefix); } - void Reader::buildLStringIndex(const std::filesystem::path& stringFile, LocalizedStringType stringType) + void Reader::buildLStringIndex(LocalizedStringType stringType, const std::u8string& prefix) { - std::uint32_t numEntries; - std::uint32_t dataSize; - std::uint32_t stringId; - LStringOffset sp; - sp.type = stringType; + static const std::filesystem::path strings("Strings"); + const std::u8string suffix(getStringsSuffix(stringType)); + std::filesystem::path path = strings / (prefix + suffix); - // TODO: possibly check if the resource exists? - Files::IStreamPtr filestream = mVFS - ? mVFS->get(stringFile.string()) - : Files::openConstrainedFileStream(mCtx.filename.parent_path() / stringFile); - - filestream->seekg(0, std::ios::end); - std::size_t fileSize = filestream->tellg(); - filestream->seekg(0, std::ios::beg); - - std::istream* stream = filestream.get(); - switch (stringType) + if (mVFS != nullptr) { - case Type_Strings: - mStrings = std::move(filestream); - break; - case Type_ILStrings: - mILStrings = std::move(filestream); - break; - case Type_DLStrings: - mDLStrings = std::move(filestream); - break; - default: - throw std::runtime_error("ESM4::Reader::unknown localised string type"); + const std::string vfsPath = Files::pathToUnicodeString(path); + + if (mIgnoreMissingLocalizedStrings && !mVFS->exists(vfsPath)) + { + Log(Debug::Warning) << "Ignore missing VFS strings file: " << vfsPath; + return; + } + + const Files::IStreamPtr stream = mVFS->get(vfsPath); + buildLStringIndex(stringType, *stream); + return; } - stream->read((char*)&numEntries, sizeof(numEntries)); - stream->read((char*)&dataSize, sizeof(dataSize)); - std::size_t dataStart = fileSize - dataSize; - for (unsigned int i = 0; i < numEntries; ++i) + const std::filesystem::path fsPath = mCtx.filename.parent_path() / path; + + if (mIgnoreMissingLocalizedStrings && !std::filesystem::exists(fsPath)) { - stream->read((char*)&stringId, sizeof(stringId)); - stream->read((char*)&sp.offset, sizeof(sp.offset)); - sp.offset += (std::uint32_t)dataStart; - mLStringIndex[FormId::fromUint32(stringId)] = sp; + Log(Debug::Warning) << "Ignore missing strings file: " << fsPath; + return; } - // assert (dataStart - stream->tell() == 0 && "String file start of data section mismatch"); + + const Files::IStreamPtr stream = Files::openConstrainedFileStream(fsPath); + buildLStringIndex(stringType, *stream); + } + + void Reader::buildLStringIndex(LocalizedStringType stringType, std::istream& stream) + { + stream.seekg(0, std::ios::end); + const std::istream::pos_type fileSize = stream.tellg(); + stream.seekg(0, std::ios::beg); + + std::uint32_t numEntries = 0; + stream.read(reinterpret_cast(&numEntries), sizeof(numEntries)); + + std::uint32_t dataSize = 0; + stream.read(reinterpret_cast(&dataSize), sizeof(dataSize)); + + const std::istream::pos_type dataStart = fileSize - static_cast(dataSize); + + struct LocalizedString + { + std::uint32_t mOffset = 0; + std::uint32_t mStringId = 0; + }; + + std::vector strings; + strings.reserve(numEntries); + + for (std::uint32_t i = 0; i < numEntries; ++i) + { + LocalizedString string; + + stream.read(reinterpret_cast(&string.mStringId), sizeof(string.mStringId)); + stream.read(reinterpret_cast(&string.mOffset), sizeof(string.mOffset)); + + strings.push_back(string); + } + + std::sort(strings.begin(), strings.end(), + [](const LocalizedString& l, const LocalizedString& r) { return l.mOffset < r.mOffset; }); + + std::uint32_t lastOffset = 0; + std::string_view lastValue; + + for (const LocalizedString& string : strings) + { + if (string.mOffset == lastOffset) + { + mLStringIndex.emplace(FormId::fromUint32(string.mStringId), lastValue); + continue; + } + + const std::istream::pos_type offset = string.mOffset + dataStart; + const std::istream::pos_type pos = stream.tellg(); + if (pos != offset) + { + char buffer[4096]; + if (pos < offset && offset - pos < static_cast(sizeof(buffer))) + stream.read(buffer, offset - pos); + else + stream.seekg(offset); + } + + const auto it + = mLStringIndex.emplace(FormId::fromUint32(string.mStringId), readLocalizedString(stringType, stream)) + .first; + lastOffset = string.mOffset; + lastValue = it->second; + } + } + + std::string Reader::readLocalizedString(LocalizedStringType type, std::istream& stream) + { + if (type == LocalizedStringType::Strings) + { + std::string data; + + while (true) + { + char ch = 0; + stream.read(&ch, sizeof(ch)); + if (ch == 0) + break; + data.push_back(ch); + } + + return data; + } + + std::uint32_t size = 0; + stream.read(reinterpret_cast(&size), sizeof(size)); + + std::string result; + getStringImpl(result, size, stream, true); // expect null terminated string + return result; } void Reader::getLocalizedString(std::string& str) @@ -277,48 +375,17 @@ namespace ESM4 // FIXME: very messy and probably slow/inefficient void Reader::getLocalizedStringImpl(const FormId stringId, std::string& str) { - const std::map::const_iterator it = mLStringIndex.find(stringId); + const auto it = mLStringIndex.find(stringId); - if (it != mLStringIndex.end()) + if (it == mLStringIndex.end()) { - std::istream* filestream = nullptr; - - switch (it->second.type) - { - case Type_Strings: // no string size provided - { - filestream = mStrings.get(); - filestream->seekg(it->second.offset); - - char ch; - std::vector data; - do - { - filestream->read(&ch, sizeof(ch)); - data.push_back(ch); - } while (ch != 0); - - str = std::string(data.data()); - return; - } - case Type_ILStrings: - filestream = mILStrings.get(); - break; - case Type_DLStrings: - filestream = mDLStrings.get(); - break; - default: - throw std::runtime_error("ESM4::Reader::getLocalizedString unknown string type"); - } - - // get ILStrings or DLStrings (they provide string size) - filestream->seekg(it->second.offset); - std::uint32_t size = 0; - filestream->read((char*)&size, sizeof(size)); - getStringImpl(str, size, *filestream, mEncoder, true); // expect null terminated string + if (mIgnoreMissingLocalizedStrings) + return; + throw std::runtime_error( + "ESM4::Reader::getLocalizedString localized string not found for " + formIdToString(stringId)); } - else - throw std::runtime_error("ESM4::Reader::getLocalizedString localized string not found"); + + str = it->second; } bool Reader::getRecordHeader() @@ -659,19 +726,18 @@ namespace ESM4 throw std::runtime_error(ss.str()); } - bool Reader::getStringImpl(std::string& str, std::size_t size, std::istream& stream, - const ToUTF8::StatelessUtf8Encoder* encoder, bool hasNull) + bool Reader::getStringImpl(std::string& str, std::size_t size, std::istream& stream, bool hasNull) { std::size_t newSize = size; - if (encoder) + if (mEncoder != nullptr) { std::string input(size, '\0'); stream.read(input.data(), size); if (stream.gcount() == static_cast(size)) { const std::string_view result - = encoder->getUtf8(input, ToUTF8::BufferAllocationPolicy::FitToRequiredSize, str); + = mEncoder->getUtf8(input, ToUTF8::BufferAllocationPolicy::FitToRequiredSize, str); if (str.empty() && !result.empty()) { str = std::move(input); diff --git a/components/esm4/reader.hpp b/components/esm4/reader.hpp index 86a52115e4..6b712ae9d8 100644 --- a/components/esm4/reader.hpp +++ b/components/esm4/reader.hpp @@ -131,6 +131,13 @@ namespace ESM4 ReaderContext(); }; + enum class LocalizedStringType + { + Strings, + ILStrings, + DLStrings, + }; + class Reader { VFS::Manager const* mVFS; @@ -149,24 +156,17 @@ namespace ESM4 Files::IStreamPtr mILStrings; Files::IStreamPtr mDLStrings; - enum LocalizedStringType - { - Type_Strings = 0, - Type_ILStrings = 1, - Type_DLStrings = 2 - }; - - struct LStringOffset - { - LocalizedStringType type; - std::uint32_t offset; - }; - - std::map mLStringIndex; + std::unordered_map mLStringIndex; std::vector* mGlobalReaderList = nullptr; - void buildLStringIndex(const std::filesystem::path& stringFile, LocalizedStringType stringType); + bool mIgnoreMissingLocalizedStrings = false; + + void buildLStringIndex(LocalizedStringType stringType, const std::u8string& prefix); + + void buildLStringIndex(LocalizedStringType stringType, std::istream& stream); + + std::string readLocalizedString(LocalizedStringType type, std::istream& stream); inline bool hasLocalizedStrings() const { return (mHeader.mFlags & Rec_Localized) != 0; } @@ -185,11 +185,12 @@ namespace ESM4 Reader() = default; - bool getStringImpl(std::string& str, std::size_t size, std::istream& stream, - const ToUTF8::StatelessUtf8Encoder* encoder, bool hasNull = false); + bool getStringImpl(std::string& str, std::size_t size, std::istream& stream, bool hasNull = false); public: - Reader(Files::IStreamPtr&& esmStream, const std::filesystem::path& filename, VFS::Manager const* vfs = nullptr); + Reader(Files::IStreamPtr&& esmStream, const std::filesystem::path& filename, VFS::Manager const* vfs, + const ToUTF8::StatelessUtf8Encoder* encoder, bool ignoreMissingLocalizedStrings = false); + ~Reader(); void open(const std::filesystem::path& filename); @@ -198,8 +199,6 @@ namespace ESM4 inline bool isEsm4() const { return true; } - inline void setEncoder(const ToUTF8::StatelessUtf8Encoder* encoder) { mEncoder = encoder; } - const std::vector& getGameFiles() const { return mHeader.mMaster; } inline int getRecordCount() const { return mHeader.mData.records; } @@ -348,14 +347,8 @@ namespace ESM4 void adjustGRUPFormId(); // Note: uses the string size from the subrecord header rather than checking null termination - bool getZString(std::string& str) - { - return getStringImpl(str, mCtx.subRecordHeader.dataSize, *mStream, mEncoder, true); - } - bool getString(std::string& str) - { - return getStringImpl(str, mCtx.subRecordHeader.dataSize, *mStream, mEncoder); - } + bool getZString(std::string& str) { return getStringImpl(str, mCtx.subRecordHeader.dataSize, *mStream, true); } + bool getString(std::string& str) { return getStringImpl(str, mCtx.subRecordHeader.dataSize, *mStream); } bool getZeroTerminatedStringArray(std::vector& values);