From 71e5a15298123055a10d4afafe6fe4e7b58f8b0d Mon Sep 17 00:00:00 2001 From: Nicolay Korslund Date: Thu, 16 Sep 2010 10:24:45 +0200 Subject: [PATCH] Minor improvement to to_utf8 --- components/to_utf8/to_utf8.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/components/to_utf8/to_utf8.cpp b/components/to_utf8/to_utf8.cpp index 14a7ff1cce..6f69331439 100644 --- a/components/to_utf8/to_utf8.cpp +++ b/components/to_utf8/to_utf8.cpp @@ -83,12 +83,28 @@ static size_t getLength(const char *arr, const char* input, bool &ascii) { ascii = true; size_t len = 0; - unsigned char inp = *input; - while(inp) + const char* ptr = input; + unsigned char inp = *ptr; + + // Do away with the ascii part of the string first (this is almost + // always the entire string.) + while(inp && inp < 128) + inp = *(++ptr); + len += (ptr-input); + + // If we're not at the null terminator at this point, then there + // were some non-ascii characters to deal with. Go to slow-mode for + // the rest of the string. + if(inp) { - if(inp > 127) ascii = false; - len += arr[inp*6]; - inp = *(++input); + ascii = false; + while(inp) + { + // Find the translated length of this character in the + // lookup table. + len += arr[inp*6]; + inp = *(++ptr); + } } return len; }