Create utf16_conv_utf8 - recycled code - should probably be

cleaned up
This commit is contained in:
twinaphex 2015-11-16 07:55:09 +01:00
parent d69aa24f28
commit 6f50187b56
3 changed files with 88 additions and 116 deletions

View File

@ -35,6 +35,7 @@
#ifdef HAVE_COMPRESSION
#include <file/file_extract.h>
#endif
#include <encodings/utf.h>
#include "file_ops.h"
@ -56,132 +57,33 @@ static int Buf_EnsureSize(CBuf *dest, size_t size)
return Buf_Create(dest, size, &g_Alloc);
}
#ifndef _WIN32
static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
static Bool Utf16_To_Utf8(uint8_t *dest, size_t *destLen,
const uint16_t *src, size_t srcLen)
{
size_t destPos = 0;
size_t srcPos = 0;
for (;;)
{
unsigned numAdds;
uint32_t value;
if (srcPos == srcLen)
{
*destLen = destPos;
return True;
}
value = src[srcPos++];
if (value < 0x80)
{
if (dest)
dest[destPos] = (char)value;
destPos++;
continue;
}
if (value >= 0xD800 && value < 0xE000)
{
uint32_t c2;
if (value >= 0xDC00 || srcPos == srcLen)
break;
c2 = src[srcPos++];
if (c2 < 0xDC00 || c2 >= 0xE000)
break;
value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
}
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((uint32_t)1) << (numAdds * 5 + 6)))
break;
if (dest)
dest[destPos] = (char)(kUtf8Limits[numAdds - 1]
+ (value >> (6 * numAdds)));
destPos++;
do
{
numAdds--;
if (dest)
dest[destPos] = (char)(0x80
+ ((value >> (6 * numAdds)) & 0x3F));
destPos++;
}while (numAdds != 0);
}
*destLen = destPos;
return False;
}
static SRes Utf16_To_Utf8Buf(CBuf *dest,
const uint16_t *src, size_t srcLen)
{
Bool res;
size_t destLen = 0;
Utf16_To_Utf8(NULL, &destLen, src, srcLen);
destLen += 1;
if (!Buf_EnsureSize(dest, destLen))
return SZ_ERROR_MEM;
res = Utf16_To_Utf8(dest->data, &destLen, src, srcLen);
dest->data[destLen] = 0;
return res ? SZ_OK : SZ_ERROR_FAIL;
}
#endif
static SRes Utf16_To_Char(CBuf *buf, const uint16_t *s, int fileMode)
static bool ConvertUtf16toCharString(const uint16_t *s, char *outstring)
{
CBuf buf;
bool res;
size_t out_chars = 0;
int len = 0;
Buf_Init(&buf);
for (len = 0; s[len] != '\0'; len++);
#ifdef _WIN32
utf16_conv_utf8(NULL, &out_chars, s, len);
out_chars += 1;
if (!Buf_EnsureSize(&buf, out_chars))
{
int size = len * 3 + 100;
if (!Buf_EnsureSize(buf, size))
return SZ_ERROR_MEM;
{
char defaultChar = '_';
BOOL defUsed;
int numChars = WideCharToMultiByte(fileMode ?
(
#ifdef UNDER_CE
CP_ACP
#else
AreFileApisANSI() ? CP_ACP : CP_OEMCP
#endif
) : CP_OEMCP,
0, (LPCWSTR)s, len, (char *)buf->data,
size, &defaultChar, &defUsed);
if (numChars == 0 || numChars >= size)
return SZ_ERROR_FAIL;
buf->data[numChars] = 0;
return SZ_OK;
}
res = SZ_ERROR_MEM;
goto end;
}
#else
(void)fileMode;
return Utf16_To_Utf8Buf(buf, s, len);
#endif
}
static SRes ConvertUtf16toCharString(const uint16_t *s, char *outstring)
{
CBuf buf;
SRes res;
Buf_Init(&buf);
res = Utf16_To_Char(&buf, s, 0);
res = utf16_conv_utf8(buf.data, &out_chars, s, len);
buf.data[out_chars] = 0;
if (res == SZ_OK)
strncpy(outstring, (const char*)buf.data, PATH_MAX_LENGTH);
end:
Buf_Free(&buf, &g_Alloc);
return res;
}
@ -267,7 +169,7 @@ static int read_7zip_file(
}
SzArEx_GetFileNameUtf16(&db, i, temp);
res = ConvertUtf16toCharString(temp,infile);
res = ConvertUtf16toCharString(temp,infile) ? SZ_OK : SZ_ERROR_FAIL;
if (!strcmp(infile, relative_path))
{
@ -418,7 +320,7 @@ static struct string_list *compressed_7zip_file_list_new(
}
}
SzArEx_GetFileNameUtf16(&db, i, temp);
res = ConvertUtf16toCharString(temp, infile);
res = ConvertUtf16toCharString(temp, infile) ? SZ_OK : SZ_ERROR_FAIL;
file_ext = path_get_extension(infile);
if (string_list_find_elem_prefix(ext_list, ".", file_ext))

View File

@ -23,6 +23,7 @@
#include <stdint.h>
#include <stddef.h>
#include <boolean.h>
#include <retro_inline.h>
static INLINE unsigned leading_ones(uint8_t c)
@ -73,3 +74,67 @@ size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
return ret;
}
bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
const uint16_t *in, size_t in_size)
{
static uint8_t kUtf8Limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
size_t out_pos = 0;
size_t in_pos = 0;
for (;;)
{
unsigned numAdds;
uint32_t value;
if (in_pos == in_size)
{
*out_chars = out_pos;
return true;
}
value = in[in_pos++];
if (value < 0x80)
{
if (out)
out[out_pos] = (char)value;
out_pos++;
continue;
}
if (value >= 0xD800 && value < 0xE000)
{
uint32_t c2;
if (value >= 0xDC00 || in_pos == in_size)
break;
c2 = in[in_pos++];
if (c2 < 0xDC00 || c2 >= 0xE000)
break;
value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000;
}
for (numAdds = 1; numAdds < 5; numAdds++)
if (value < (((uint32_t)1) << (numAdds * 5 + 6)))
break;
if (out)
out[out_pos] = (char)(kUtf8Limits[numAdds - 1]
+ (value >> (6 * numAdds)));
out_pos++;
do
{
numAdds--;
if (out)
out[out_pos] = (char)(0x80
+ ((value >> (6 * numAdds)) & 0x3F));
out_pos++;
}while (numAdds != 0);
}
*out_chars = out_pos;
return false;
}

View File

@ -26,6 +26,8 @@
#include <stdint.h>
#include <stddef.h>
#include <boolean.h>
#ifdef __cplusplus
extern "C" {
#endif
@ -33,6 +35,9 @@ extern "C" {
size_t utf8_conv_utf32(uint32_t *out, size_t out_chars,
const char *in, size_t in_size);
bool utf16_conv_utf8(uint8_t *out, size_t *out_chars,
const uint16_t *in, size_t in_size);
#ifdef __cplusplus
}
#endif