1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-02-03 17:54:06 +00:00

Support UTF-8 by StringRefId::toDebugString

This commit is contained in:
elsid 2023-06-01 11:35:34 +02:00
parent 8e3e351015
commit 78b3f7288a
No known key found for this signature in database
GPG Key ID: 4DE04C198CBA7625
3 changed files with 67 additions and 16 deletions

View File

@ -287,7 +287,11 @@ namespace ESM
{ RefId(), "Empty{}" },
{ RefId::stringRefId("foo"), "\"foo\"" },
{ RefId::stringRefId("BAR"), "\"BAR\"" },
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xFF\\xA\\x9\"" },
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xff\\xa\\x9\"" },
{ RefId::stringRefId("Логово дракона"), "\"Логово дракона\"" },
{ RefId::stringRefId("\xd0\x9b"), "\"Л\"" },
{ RefId::stringRefId("\xff\x9b"), "\"\\xff\\x9b\"" },
{ RefId::stringRefId("\xd0\xd0"), "\"\\xd0\\xd0\"" },
{ RefId::formIdRefId({ .mIndex = 42, .mContentFile = 0 }), "FormId:0x2a" },
{ RefId::formIdRefId({ .mIndex = 0xffffff, .mContentFile = std::numeric_limits<std::int32_t>::min() }),
"FormId:0xff80000000ffffff" },

View File

@ -1,13 +1,17 @@
#include "stringrefid.hpp"
#include "serializerefid.hpp"
#include <charconv>
#include <iomanip>
#include <mutex>
#include <ostream>
#include <sstream>
#include <system_error>
#include <unordered_set>
#include "components/misc/guarded.hpp"
#include "components/misc/strings/algorithm.hpp"
#include "components/misc/utf8stream.hpp"
namespace ESM
{
@ -26,6 +30,18 @@ namespace ESM
it = locked->emplace(id).first;
return &*it;
}
void addHex(unsigned char value, std::string& result)
{
const std::size_t size = 2 + getHexIntegralSize(value);
const std::size_t shift = result.size();
result.resize(shift + size);
result[shift] = '\\';
result[shift + 1] = 'x';
const auto [end, ec] = std::to_chars(result.data() + shift + 2, result.data() + result.size(), value, 16);
if (ec != std::errc())
throw std::system_error(std::make_error_code(ec));
}
}
StringRefId::StringRefId()
@ -60,20 +76,43 @@ namespace ESM
std::ostream& operator<<(std::ostream& stream, StringRefId value)
{
stream << '"';
for (char c : *value.mValue)
if (std::isprint(c) && c != '\t' && c != '\n' && c != '\r')
stream << c;
else
stream << "\\x" << std::hex << std::uppercase << static_cast<unsigned>(static_cast<unsigned char>(c));
return stream << '"';
return stream << value.toDebugString();
}
std::string StringRefId::toDebugString() const
{
std::ostringstream stream;
stream << *this;
return stream.str();
std::string result;
result.reserve(2 + mValue->size());
result.push_back('"');
const unsigned char* ptr = reinterpret_cast<const unsigned char*>(mValue->data());
const unsigned char* const end = reinterpret_cast<const unsigned char*>(mValue->data() + mValue->size());
while (ptr != end)
{
if (Utf8Stream::isAscii(*ptr))
{
if (std::isprint(*ptr) && *ptr != '\t' && *ptr != '\n' && *ptr != '\r')
result.push_back(*ptr);
else
addHex(*ptr, result);
++ptr;
continue;
}
const auto [octets, first] = Utf8Stream::getOctetCount(*ptr);
const auto [chr, next] = Utf8Stream::decode(ptr + 1, end, first, octets);
if (chr == Utf8Stream::sBadChar())
{
while (ptr != std::min(end, ptr + octets))
{
addHex(*ptr, result);
++ptr;
}
continue;
}
result.append(ptr, next);
ptr = next;
}
result.push_back('"');
return result;
}
bool StringRefId::startsWith(std::string_view prefix) const

View File

@ -1,6 +1,7 @@
#ifndef MISC_UTF8ITER_HPP
#define MISC_UTF8ITER_HPP
#include <cstdint>
#include <cstring>
#include <string>
#include <string_view>
@ -63,9 +64,11 @@ public:
return val;
}
static bool isAscii(unsigned char value) { return (value & 0x80) == 0; }
static std::pair<UnicodeChar, Point> decode(Point cur, Point end)
{
if ((*cur & 0x80) == 0)
if (isAscii(*cur))
{
UnicodeChar chr = *cur++;
@ -75,8 +78,13 @@ public:
int octets;
UnicodeChar chr;
std::tie(octets, chr) = octet_count(*cur++);
std::tie(octets, chr) = getOctetCount(*cur++);
return decode(cur, end, chr, octets);
}
static std::pair<UnicodeChar, Point> decode(Point cur, Point end, UnicodeChar chr, std::size_t octets)
{
if (octets > 5)
return std::make_pair(sBadChar(), cur);
@ -161,10 +169,9 @@ public:
return out;
}
private:
static std::pair<int, UnicodeChar> octet_count(unsigned char octet)
static std::pair<std::size_t, UnicodeChar> getOctetCount(unsigned char octet)
{
int octets;
std::size_t octets;
unsigned char mark = 0xC0;
unsigned char mask = 0xE0;
@ -181,6 +188,7 @@ private:
return std::make_pair(octets, octet & ~mask);
}
private:
void next() { std::tie(val, nxt) = decode(nxt, end); }
Point cur;