From c75e938c46a660253a6dd76c3545f82ce40f8eb9 Mon Sep 17 00:00:00 2001 From: elsid Date: Sat, 12 Feb 2022 12:00:35 +0100 Subject: [PATCH] Return string_view from Utf8Encoder functions To avoid redundant std::string constructions. --- components/esm3/esmreader.cpp | 2 +- components/esm3/esmwriter.cpp | 4 ++-- components/fontloader/fontloader.cpp | 21 ++++++++++++++------- components/to_utf8/to_utf8.cpp | 14 ++++++-------- components/to_utf8/to_utf8.hpp | 10 +++++++--- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/components/esm3/esmreader.cpp b/components/esm3/esmreader.cpp index 667132c60f..165263d6e4 100644 --- a/components/esm3/esmreader.cpp +++ b/components/esm3/esmreader.cpp @@ -320,7 +320,7 @@ std::string ESMReader::getString(int size) // Convert to UTF8 and return if (mEncoder) - return mEncoder->getUtf8(std::string_view(ptr, size)); + return std::string(mEncoder->getUtf8(std::string_view(ptr, size))); return std::string (ptr, size); } diff --git a/components/esm3/esmwriter.cpp b/components/esm3/esmwriter.cpp index f65340f703..d0137c5131 100644 --- a/components/esm3/esmwriter.cpp +++ b/components/esm3/esmwriter.cpp @@ -193,9 +193,9 @@ namespace ESM else { // Convert to UTF8 and return - std::string string = mEncoder ? mEncoder->getLegacyEnc(data) : data; + const std::string_view string = mEncoder != nullptr ? mEncoder->getLegacyEnc(data) : data; - write(string.c_str(), string.size()); + write(string.data(), string.size()); } } diff --git a/components/fontloader/fontloader.cpp b/components/fontloader/fontloader.cpp index 76f554bec7..36174d0b7e 100644 --- a/components/fontloader/fontloader.cpp +++ b/components/fontloader/fontloader.cpp @@ -1,6 +1,8 @@ #include "fontloader.hpp" #include +#include +#include #include @@ -26,7 +28,7 @@ namespace { - unsigned long utf8ToUnicode(const std::string& utf8) + unsigned long utf8ToUnicode(std::string_view utf8) { size_t i = 0; unsigned long unicode; @@ -116,16 +118,21 @@ namespace } } - // getUtf8, aka the worst function ever written. - // This includes various hacks for dealing with Morrowind's .fnt files that are *mostly* + // getUnicode includes various hacks for dealing with Morrowind's .fnt files that are *mostly* // in the expected win12XX encoding, but also have randomly swapped characters sometimes. // Looks like the Morrowind developers found standard encodings too boring and threw in some twists for fun. - std::string getUtf8 (unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding) + unsigned long getUnicode(unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding) { if (encoding == ToUTF8::WINDOWS_1250) // Hack for polish font - return encoder.getUtf8(std::string(1, mapUtf8Char(c))); + { + const std::array str {static_cast(mapUtf8Char(c)), '\0'}; + return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1))); + } else - return encoder.getUtf8(std::string(1, c)); + { + const std::array str {static_cast(c), '\0'}; + return utf8ToUnicode(encoder.getUtf8(std::string_view(str.data(), 1))); + } } [[noreturn]] void fail (Files::IStreamPtr file, const std::string& fileName, const std::string& message) @@ -355,7 +362,7 @@ namespace Gui float h = data[i].bottom_left.y*height - y1; ToUTF8::Utf8Encoder encoder(mEncoding); - unsigned long unicodeVal = utf8ToUnicode(getUtf8(i, encoder, mEncoding)); + unsigned long unicodeVal = getUnicode(i, encoder, mEncoding); MyGUI::xml::ElementPtr code = codes->createChild("Code"); code->addAttribute("index", unicodeVal); diff --git a/components/to_utf8/to_utf8.cpp b/components/to_utf8/to_utf8.cpp index 3c4421c605..7fd0e3cd88 100644 --- a/components/to_utf8/to_utf8.cpp +++ b/components/to_utf8/to_utf8.cpp @@ -77,7 +77,7 @@ Utf8Encoder::Utf8Encoder(const FromType sourceEncoding): } } -std::string Utf8Encoder::getUtf8(std::string_view input) +std::string_view Utf8Encoder::getUtf8(std::string_view input) { if (input.empty()) return input; @@ -100,7 +100,7 @@ std::string Utf8Encoder::getUtf8(std::string_view input) // If we're pure ascii, then don't bother converting anything. if(ascii) - return std::string(input.data(), outlen); + return std::string_view(input.data(), outlen); // Make sure the output is large enough resize(outlen); @@ -117,11 +117,10 @@ std::string Utf8Encoder::getUtf8(std::string_view input) assert(mOutput.size() > outlen); assert(mOutput[outlen] == 0); - // Return a string - return std::string(&mOutput[0], outlen); + return std::string_view(mOutput.data(), outlen); } -std::string Utf8Encoder::getLegacyEnc(std::string_view input) +std::string_view Utf8Encoder::getLegacyEnc(std::string_view input) { if (input.empty()) return input; @@ -144,7 +143,7 @@ std::string Utf8Encoder::getLegacyEnc(std::string_view input) // If we're pure ascii, then don't bother converting anything. if(ascii) - return std::string(input.data(), outlen); + return std::string_view(input.data(), outlen); // Make sure the output is large enough resize(outlen); @@ -161,8 +160,7 @@ std::string Utf8Encoder::getLegacyEnc(std::string_view input) assert(mOutput.size() > outlen); assert(mOutput[outlen] == 0); - // Return a string - return std::string(&mOutput[0], outlen); + return std::string_view(mOutput.data(), outlen); } // Make sure the output vector is large enough for 'size' bytes, diff --git a/components/to_utf8/to_utf8.hpp b/components/to_utf8/to_utf8.hpp index 07960ae3f9..0e9db01e1d 100644 --- a/components/to_utf8/to_utf8.hpp +++ b/components/to_utf8/to_utf8.hpp @@ -28,10 +28,14 @@ namespace ToUTF8 public: Utf8Encoder(FromType sourceEncoding); - // Convert to UTF8 from the previously given code page. - std::string getUtf8(std::string_view input); + /// Convert to UTF8 from the previously given code page. + /// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not + /// ASCII-only string. Otherwise returns a view to the input. + std::string_view getUtf8(std::string_view input); - std::string getLegacyEnc(std::string_view input); + /// Returns a view to internal buffer invalidate by next getUtf8 or getLegacyEnc call if input is not + /// ASCII-only string. Otherwise returns a view to the input. + std::string_view getLegacyEnc(std::string_view input); private: void resize(size_t size);