Make base::string_to_lower/upper() use tolower/upper() for each Unicode char

It fixes issue #1065 because now UTF-8 strings are not destroyed by these base::string_to_*() routines. This issue appears when we use std::setlocale(), which was introduced in 27b55030e26e93c5e8d9e7e21206c8709d46ff22
2025-04-16 23:42:57 +00:00 · 2016-04-18 16:38:37 -03:00 · 2016-04-18 16:38:37 -03:00 · fca32900bb
commit fca32900bb
parent 736df91f8c
2 changed files with 46 additions and 12 deletions
--- a/src/base/string.cpp
+++ b/src/base/string.cpp
@ -1,5 +1,5 @@
 // Aseprite Base Library
-// Copyright (c) 2001-2013, 2015 David Capello
+// Copyright (c) 2001-2016 David Capello
 //
 // This file is released under the terms of the MIT license.
 // Read LICENSE.txt for more information.
@ -21,22 +21,26 @@ namespace base {

 std::string string_to_lower(const std::string& original)
 {
-  std::string result(original);
-
-  for (std::string::iterator it=result.begin(); it!=result.end(); ++it)
+  std::wstring result(from_utf8(original));
+  auto it(result.begin());
+  auto end(result.end());
+  while (it != end) {
    *it = std::tolower(*it);
-
-  return result;
+    ++it;
+  }
+  return to_utf8(result);
 }

 std::string string_to_upper(const std::string& original)
 {
-  std::string result(original);
-
-  for (std::string::iterator it=result.begin(); it!=result.end(); ++it)
+  std::wstring result(from_utf8(original));
+  auto it(result.begin());
+  auto end(result.end());
+  while (it != end) {
    *it = std::toupper(*it);
-
-  return result;
+    ++it;
+  }
+  return to_utf8(result);
 }

 #ifdef _WIN32
--- a/src/base/string_tests.cpp
+++ b/src/base/string_tests.cpp
@ -1,5 +1,5 @@
 // Aseprite Base Library
-// Copyright (c) 2001-2013 David Capello
+// Copyright (c) 2001-2016 David Capello
 //
 // This file is released under the terms of the MIT license.
 // Read LICENSE.txt for more information.
@ -9,6 +9,7 @@
 #include "base/string.h"

 #include <algorithm>
+#include <clocale>

 using namespace base;

@ -87,6 +88,35 @@ TEST(String, Utf8ICmp)
  EXPECT_EQ(1, utf8_icmp("z", "b", 2));
 }

+TEST(String, StringToLowerByUnicodeCharIssue1065)
+{
+  // Required to make old string_to_lower() version fail.
+  std::setlocale(LC_ALL, "en-US");
+
+  std::string  a = "\xC2\xBA";
+  std::wstring b = from_utf8(a);
+  std::string  c = to_utf8(b);
+
+  ASSERT_EQ(a, c);
+  ASSERT_EQ("\xC2\xBA", c);
+
+  ASSERT_EQ(1, utf8_length(a));
+  ASSERT_EQ(1, b.size());
+  ASSERT_EQ(1, utf8_length(c));
+
+  std::string d = string_to_lower(c);
+  ASSERT_EQ(a, d);
+  ASSERT_EQ(c, d);
+  ASSERT_EQ(1, utf8_length(d));
+
+  auto it = utf8_iterator(d.begin());
+  auto end = utf8_iterator(d.end());
+  int i = 0;
+  for (; it != end; ++it) {
+    ASSERT_EQ(b[i++], *it);
+  }
+}
+
 int main(int argc, char** argv)
 {
  ::testing::InitGoogleTest(&argc, argv);