diff --git a/file_ops.c b/file_ops.c index 4252bc538b..39b38594f6 100644 --- a/file_ops.c +++ b/file_ops.c @@ -35,6 +35,7 @@ #ifdef HAVE_COMPRESSION #include #endif +#include #include "file_ops.h" @@ -56,132 +57,33 @@ static int Buf_EnsureSize(CBuf *dest, size_t size) return Buf_Create(dest, size, &g_Alloc); } -#ifndef _WIN32 - -static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - -static Bool Utf16_To_Utf8(uint8_t *dest, size_t *destLen, - const uint16_t *src, size_t srcLen) -{ - size_t destPos = 0; - size_t srcPos = 0; - - for (;;) - { - unsigned numAdds; - uint32_t value; - - if (srcPos == srcLen) - { - *destLen = destPos; - return True; - } - value = src[srcPos++]; - if (value < 0x80) - { - if (dest) - dest[destPos] = (char)value; - destPos++; - continue; - } - if (value >= 0xD800 && value < 0xE000) - { - uint32_t c2; - - if (value >= 0xDC00 || srcPos == srcLen) - break; - c2 = src[srcPos++]; - if (c2 < 0xDC00 || c2 >= 0xE000) - break; - value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; - } - for (numAdds = 1; numAdds < 5; numAdds++) - if (value < (((uint32_t)1) << (numAdds * 5 + 6))) - break; - if (dest) - dest[destPos] = (char)(kUtf8Limits[numAdds - 1] - + (value >> (6 * numAdds))); - destPos++; - do - { - numAdds--; - if (dest) - dest[destPos] = (char)(0x80 - + ((value >> (6 * numAdds)) & 0x3F)); - destPos++; - }while (numAdds != 0); - } - *destLen = destPos; - return False; -} - -static SRes Utf16_To_Utf8Buf(CBuf *dest, - const uint16_t *src, size_t srcLen) -{ - Bool res; - size_t destLen = 0; - - Utf16_To_Utf8(NULL, &destLen, src, srcLen); - destLen += 1; - - if (!Buf_EnsureSize(dest, destLen)) - return SZ_ERROR_MEM; - - res = Utf16_To_Utf8(dest->data, &destLen, src, srcLen); - dest->data[destLen] = 0; - - return res ? SZ_OK : SZ_ERROR_FAIL; -} -#endif - -static SRes Utf16_To_Char(CBuf *buf, const uint16_t *s, int fileMode) +static bool ConvertUtf16toCharString(const uint16_t *s, char *outstring) { + CBuf buf; + bool res; + size_t out_chars = 0; int len = 0; + Buf_Init(&buf); + for (len = 0; s[len] != '\0'; len++); -#ifdef _WIN32 + utf16_conv_utf8(NULL, &out_chars, s, len); + out_chars += 1; + + if (!Buf_EnsureSize(&buf, out_chars)) { - int size = len * 3 + 100; - if (!Buf_EnsureSize(buf, size)) - return SZ_ERROR_MEM; - { - char defaultChar = '_'; - BOOL defUsed; - int numChars = WideCharToMultiByte(fileMode ? - ( -#ifdef UNDER_CE - CP_ACP -#else - AreFileApisANSI() ? CP_ACP : CP_OEMCP -#endif - ) : CP_OEMCP, - 0, (LPCWSTR)s, len, (char *)buf->data, - size, &defaultChar, &defUsed); - if (numChars == 0 || numChars >= size) - return SZ_ERROR_FAIL; - buf->data[numChars] = 0; - return SZ_OK; - } + res = SZ_ERROR_MEM; + goto end; } -#else - (void)fileMode; - return Utf16_To_Utf8Buf(buf, s, len); -#endif -} - -static SRes ConvertUtf16toCharString(const uint16_t *s, char *outstring) -{ - CBuf buf; - SRes res; - - Buf_Init(&buf); - res = Utf16_To_Char(&buf, s, 0); + res = utf16_conv_utf8(buf.data, &out_chars, s, len); + buf.data[out_chars] = 0; if (res == SZ_OK) strncpy(outstring, (const char*)buf.data, PATH_MAX_LENGTH); +end: Buf_Free(&buf, &g_Alloc); return res; } @@ -267,7 +169,7 @@ static int read_7zip_file( } SzArEx_GetFileNameUtf16(&db, i, temp); - res = ConvertUtf16toCharString(temp,infile); + res = ConvertUtf16toCharString(temp,infile) ? SZ_OK : SZ_ERROR_FAIL; if (!strcmp(infile, relative_path)) { @@ -418,7 +320,7 @@ static struct string_list *compressed_7zip_file_list_new( } } SzArEx_GetFileNameUtf16(&db, i, temp); - res = ConvertUtf16toCharString(temp, infile); + res = ConvertUtf16toCharString(temp, infile) ? SZ_OK : SZ_ERROR_FAIL; file_ext = path_get_extension(infile); if (string_list_find_elem_prefix(ext_list, ".", file_ext)) diff --git a/libretro-common/encodings/encoding_utf.c b/libretro-common/encodings/encoding_utf.c index 4fe5b3d3d0..ffff6319e7 100644 --- a/libretro-common/encodings/encoding_utf.c +++ b/libretro-common/encodings/encoding_utf.c @@ -23,6 +23,7 @@ #include #include +#include #include static INLINE unsigned leading_ones(uint8_t c) @@ -73,3 +74,67 @@ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, return ret; } + + +bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, + const uint16_t *in, size_t in_size) +{ + static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + size_t out_pos = 0; + size_t in_pos = 0; + + for (;;) + { + unsigned numAdds; + uint32_t value; + + if (in_pos == in_size) + { + *out_chars = out_pos; + return true; + } + + value = in[in_pos++]; + + if (value < 0x80) + { + if (out) + out[out_pos] = (char)value; + out_pos++; + continue; + } + + if (value >= 0xD800 && value < 0xE000) + { + uint32_t c2; + + if (value >= 0xDC00 || in_pos == in_size) + break; + c2 = in[in_pos++]; + if (c2 < 0xDC00 || c2 >= 0xE000) + break; + value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; + } + + for (numAdds = 1; numAdds < 5; numAdds++) + if (value < (((uint32_t)1) << (numAdds * 5 + 6))) + break; + + if (out) + out[out_pos] = (char)(kUtf8Limits[numAdds - 1] + + (value >> (6 * numAdds))); + out_pos++; + + do + { + numAdds--; + if (out) + out[out_pos] = (char)(0x80 + + ((value >> (6 * numAdds)) & 0x3F)); + out_pos++; + }while (numAdds != 0); + } + + *out_chars = out_pos; + return false; +} diff --git a/libretro-common/include/encodings/utf.h b/libretro-common/include/encodings/utf.h index ac49478ed1..5a1517bb3f 100644 --- a/libretro-common/include/encodings/utf.h +++ b/libretro-common/include/encodings/utf.h @@ -26,6 +26,8 @@ #include #include +#include + #ifdef __cplusplus extern "C" { #endif @@ -33,6 +35,9 @@ extern "C" { size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, const char *in, size_t in_size); +bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, + const uint16_t *in, size_t in_size); + #ifdef __cplusplus } #endif