mirror of
https://gitlab.com/OpenMW/openmw.git
synced 2025-02-03 17:54:06 +00:00
Support UTF-8 by StringRefId::toDebugString
This commit is contained in:
parent
8e3e351015
commit
78b3f7288a
@ -287,7 +287,11 @@ namespace ESM
|
||||
{ RefId(), "Empty{}" },
|
||||
{ RefId::stringRefId("foo"), "\"foo\"" },
|
||||
{ RefId::stringRefId("BAR"), "\"BAR\"" },
|
||||
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xFF\\xA\\x9\"" },
|
||||
{ RefId::stringRefId(std::string({ 'a', 0, -1, '\n', '\t' })), "\"a\\x0\\xff\\xa\\x9\"" },
|
||||
{ RefId::stringRefId("Логово дракона"), "\"Логово дракона\"" },
|
||||
{ RefId::stringRefId("\xd0\x9b"), "\"Л\"" },
|
||||
{ RefId::stringRefId("\xff\x9b"), "\"\\xff\\x9b\"" },
|
||||
{ RefId::stringRefId("\xd0\xd0"), "\"\\xd0\\xd0\"" },
|
||||
{ RefId::formIdRefId({ .mIndex = 42, .mContentFile = 0 }), "FormId:0x2a" },
|
||||
{ RefId::formIdRefId({ .mIndex = 0xffffff, .mContentFile = std::numeric_limits<std::int32_t>::min() }),
|
||||
"FormId:0xff80000000ffffff" },
|
||||
|
@ -1,13 +1,17 @@
|
||||
#include "stringrefid.hpp"
|
||||
#include "serializerefid.hpp"
|
||||
|
||||
#include <charconv>
|
||||
#include <iomanip>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <system_error>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "components/misc/guarded.hpp"
|
||||
#include "components/misc/strings/algorithm.hpp"
|
||||
#include "components/misc/utf8stream.hpp"
|
||||
|
||||
namespace ESM
|
||||
{
|
||||
@ -26,6 +30,18 @@ namespace ESM
|
||||
it = locked->emplace(id).first;
|
||||
return &*it;
|
||||
}
|
||||
|
||||
void addHex(unsigned char value, std::string& result)
|
||||
{
|
||||
const std::size_t size = 2 + getHexIntegralSize(value);
|
||||
const std::size_t shift = result.size();
|
||||
result.resize(shift + size);
|
||||
result[shift] = '\\';
|
||||
result[shift + 1] = 'x';
|
||||
const auto [end, ec] = std::to_chars(result.data() + shift + 2, result.data() + result.size(), value, 16);
|
||||
if (ec != std::errc())
|
||||
throw std::system_error(std::make_error_code(ec));
|
||||
}
|
||||
}
|
||||
|
||||
StringRefId::StringRefId()
|
||||
@ -60,20 +76,43 @@ namespace ESM
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, StringRefId value)
|
||||
{
|
||||
stream << '"';
|
||||
for (char c : *value.mValue)
|
||||
if (std::isprint(c) && c != '\t' && c != '\n' && c != '\r')
|
||||
stream << c;
|
||||
else
|
||||
stream << "\\x" << std::hex << std::uppercase << static_cast<unsigned>(static_cast<unsigned char>(c));
|
||||
return stream << '"';
|
||||
return stream << value.toDebugString();
|
||||
}
|
||||
|
||||
std::string StringRefId::toDebugString() const
|
||||
{
|
||||
std::ostringstream stream;
|
||||
stream << *this;
|
||||
return stream.str();
|
||||
std::string result;
|
||||
result.reserve(2 + mValue->size());
|
||||
result.push_back('"');
|
||||
const unsigned char* ptr = reinterpret_cast<const unsigned char*>(mValue->data());
|
||||
const unsigned char* const end = reinterpret_cast<const unsigned char*>(mValue->data() + mValue->size());
|
||||
while (ptr != end)
|
||||
{
|
||||
if (Utf8Stream::isAscii(*ptr))
|
||||
{
|
||||
if (std::isprint(*ptr) && *ptr != '\t' && *ptr != '\n' && *ptr != '\r')
|
||||
result.push_back(*ptr);
|
||||
else
|
||||
addHex(*ptr, result);
|
||||
++ptr;
|
||||
continue;
|
||||
}
|
||||
const auto [octets, first] = Utf8Stream::getOctetCount(*ptr);
|
||||
const auto [chr, next] = Utf8Stream::decode(ptr + 1, end, first, octets);
|
||||
if (chr == Utf8Stream::sBadChar())
|
||||
{
|
||||
while (ptr != std::min(end, ptr + octets))
|
||||
{
|
||||
addHex(*ptr, result);
|
||||
++ptr;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
result.append(ptr, next);
|
||||
ptr = next;
|
||||
}
|
||||
result.push_back('"');
|
||||
return result;
|
||||
}
|
||||
|
||||
bool StringRefId::startsWith(std::string_view prefix) const
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef MISC_UTF8ITER_HPP
|
||||
#define MISC_UTF8ITER_HPP
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@ -63,9 +64,11 @@ public:
|
||||
return val;
|
||||
}
|
||||
|
||||
static bool isAscii(unsigned char value) { return (value & 0x80) == 0; }
|
||||
|
||||
static std::pair<UnicodeChar, Point> decode(Point cur, Point end)
|
||||
{
|
||||
if ((*cur & 0x80) == 0)
|
||||
if (isAscii(*cur))
|
||||
{
|
||||
UnicodeChar chr = *cur++;
|
||||
|
||||
@ -75,8 +78,13 @@ public:
|
||||
int octets;
|
||||
UnicodeChar chr;
|
||||
|
||||
std::tie(octets, chr) = octet_count(*cur++);
|
||||
std::tie(octets, chr) = getOctetCount(*cur++);
|
||||
|
||||
return decode(cur, end, chr, octets);
|
||||
}
|
||||
|
||||
static std::pair<UnicodeChar, Point> decode(Point cur, Point end, UnicodeChar chr, std::size_t octets)
|
||||
{
|
||||
if (octets > 5)
|
||||
return std::make_pair(sBadChar(), cur);
|
||||
|
||||
@ -161,10 +169,9 @@ public:
|
||||
return out;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::pair<int, UnicodeChar> octet_count(unsigned char octet)
|
||||
static std::pair<std::size_t, UnicodeChar> getOctetCount(unsigned char octet)
|
||||
{
|
||||
int octets;
|
||||
std::size_t octets;
|
||||
|
||||
unsigned char mark = 0xC0;
|
||||
unsigned char mask = 0xE0;
|
||||
@ -181,6 +188,7 @@ private:
|
||||
return std::make_pair(octets, octet & ~mask);
|
||||
}
|
||||
|
||||
private:
|
||||
void next() { std::tie(val, nxt) = decode(nxt, end); }
|
||||
|
||||
Point cur;
|
||||
|
Loading…
x
Reference in New Issue
Block a user