Added custom UTF8 converter. Removed iconv dependency.

2025-01-30 12:32:36 +00:00 · 2010-08-18 18:45:44 +02:00 · 2010-08-18 18:45:44 +02:00 · 358e1ca5a5
commit 358e1ca5a5
parent 9a5e7816eb
11 changed files with 551 additions and 181 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -44,6 +44,12 @@ set(NIFOGRE_HEADER
    ${COMP_DIR}/nifogre/ogre_nif_loader.hpp)
 source_group(components\\nifogre FILES ${NIFOGRE} ${NIFOGRE_HEADER})

+set(TO_UTF8
+    ${COMP_DIR}/to_utf8/to_utf8.cpp)
+set(TO_UTF8_HEADER
+    ${COMP_DIR}/to_utf8/to_utf8.hpp)
+source_group(components\\to_utf8 FILES ${TO_UTF8} ${TO_UTF8_HEADER})
+
 set(ESM_STORE
    ${COMP_DIR}/esm_store/store.cpp)
 set(ESM_STORE_HEADER
@ -75,10 +81,10 @@ file(GLOB INTERPRETER ${COMP_DIR}/interpreter/*.cpp)
 file(GLOB INTERPRETER_HEADER ${COMP_DIR}/interpreter/*.hpp)
 source_group(components\\interpreter FILES ${INTERPRETER} ${INTERPRETER_HEADER})

-set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${MISC}
+set(COMPONENTS ${BSA} ${NIF} ${NIFOGRE} ${ESM_STORE} ${MISC} ${TO_UTF8}
    ${COMPILER} ${INTERPRETER} ${ESM})
 set(COMPONENTS_HEADER ${BSA_HEADER} ${NIF_HEADER} ${NIFOGRE_HEADER} ${ESM_STORE_HEADER}
-    ${ESM_HEADER} ${MISC_HEADER} ${COMPILER_HEADER}
+    ${ESM_HEADER} ${MISC_HEADER} ${COMPILER_HEADER} ${TO_UTF8_HEADER}
    ${INTERPRETER_HEADER})

 # source directory: libs
@ -158,12 +164,10 @@ endif (WIN32)
 find_package(OGRE REQUIRED)
 find_package(Boost REQUIRED COMPONENTS system filesystem program_options thread)
 find_package(OIS REQUIRED)
-find_package(Iconv REQUIRED)
 find_package(OpenAL REQUIRED)
 include_directories("."
    ${OGRE_INCLUDE_DIR} ${OGRE_INCLUDE_DIR}/Ogre
    ${OIS_INCLUDE_DIR} ${Boost_INCLUDE_DIR}
-	${ICONV_INCLUDE_DIR}
    ${PLATFORM_INCLUDE_DIR}
    ${CMAKE_HOME_DIRECTORY}/extern/caelum/include
    ${CMAKE_HOME_DIRECTORY}/extern/mygui_3.0.1/MyGUIEngine/include
--- a/apps/esmtool/CMakeLists.txt
+++ b/apps/esmtool/CMakeLists.txt
@ -9,6 +9,7 @@ source_group(apps\\esmtool FILES ${ESMTOOL})
 add_executable(esmtool
  ${ESMTOOL}
  ${MISC} ${MISC_HEADER}
+  ${TO_UTF8}
 )

 target_link_libraries(esmtool
--- a/apps/openmw/CMakeLists.txt
+++ b/apps/openmw/CMakeLists.txt
@ -199,7 +199,6 @@ target_link_libraries(openmw
  ${Boost_LIBRARIES}
  ${OPENAL_LIBRARY}
  ${SOUND_INPUT_LIBRARY}
-  ${ICONV_LIBRARIES}
  caelum
  MyGUIEngine
  MyGUI.OgrePlatform
--- a/cmake/FindIconv.cmake
+++ b/cmake/FindIconv.cmake
@ -1,69 +0,0 @@
-# - Try to find Iconv 
-# Once done this will define 
-# 
-#  ICONV_FOUND - system has Iconv 
-#  ICONV_INCLUDE_DIR - the Iconv include directory 
-#  ICONV_LIBRARIES - Link these to use Iconv 
-#  ICONV_SECOND_ARGUMENT_IS_CONST - the second argument for iconv() is const
-# 
-include(CheckCCompilerFlag)
-include(CheckCXXSourceCompiles)
-
-IF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
-  # Already in cache, be silent
-  SET(ICONV_FIND_QUIETLY TRUE)
-ENDIF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
-
-IF(WIN32)
-    SET(ICONV_INCLUDE_DIR $ENV{ICONV_INCLUDE_DIR})
-    SET(ICONV_LIBRARIES $ENV{ICONV_LIBRARIES})
-ENDIF(WIN32)
-
-FIND_PATH(ICONV_INCLUDE_DIR iconv.h) 
- 
-FIND_LIBRARY(ICONV_LIBRARIES NAMES iconv libiconv c)
- 
-IF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) 
-   SET(ICONV_FOUND TRUE) 
-ENDIF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) 
-
-set(CMAKE_REQUIRED_INCLUDES ${ICONV_INCLUDE_DIR})
-set(CMAKE_REQUIRED_LIBRARIES ${ICONV_LIBRARIES})
-IF(ICONV_FOUND)
-  check_c_compiler_flag("-Werror" ICONV_HAVE_WERROR)
-  set (CMAKE_C_FLAGS_BACKUP "${CMAKE_C_FLAGS}")
-  if(ICONV_HAVE_WERROR)
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
-  endif(ICONV_HAVE_WERROR)
-  check_c_source_compiles("
-  #include <iconv.h>
-  int main(){
-    iconv_t conv = 0;
-    const char* in = 0;
-    size_t ilen = 0;
-    char* out = 0;
-    size_t olen = 0;
-    iconv(conv, &in, &ilen, &out, &olen);
-    return 0;
-  }
-" ICONV_SECOND_ARGUMENT_IS_CONST )
-  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS_BACKUP}")
-ENDIF(ICONV_FOUND)
-set(CMAKE_REQUIRED_INCLUDES)
-set(CMAKE_REQUIRED_LIBRARIES)
-
-IF(ICONV_FOUND) 
-  IF(NOT ICONV_FIND_QUIETLY) 
-    MESSAGE(STATUS "Found Iconv: ${ICONV_LIBRARIES}") 
-  ENDIF(NOT ICONV_FIND_QUIETLY) 
-ELSE(ICONV_FOUND) 
-  IF(Iconv_FIND_REQUIRED) 
-    MESSAGE(FATAL_ERROR "Could not find Iconv") 
-  ENDIF(Iconv_FIND_REQUIRED) 
-ENDIF(ICONV_FOUND) 
-
-MARK_AS_ADVANCED(
-  ICONV_INCLUDE_DIR
-  ICONV_LIBRARIES
-  ICONV_SECOND_ARGUMENT_IS_CONST
-)
--- a/components/esm/esm_reader.hpp
+++ b/components/esm/esm_reader.hpp
@ -3,22 +3,18 @@

 #include <string>
 #include <libs/platform/stdint.h>
-#include <string.h>
 #include <assert.h>
 #include <vector>
 #include <sstream>
-#include <iomanip>
-#include <errno.h>
-
-#ifndef __WIN32__
-    #include <iconv.h>
-#endif
+#include <string.h>

 #include <libs/mangle/stream/stream.hpp>
 #include <libs/mangle/stream/servers/file_stream.hpp>
 #include <libs/mangle/tools/str_exception.hpp>
 #include <components/misc/stringops.hpp>

+#include <components/to_utf8/to_utf8.hpp>
+
 #ifdef __APPLE__
 // need our own implementation of strnlen
 static size_t strnlen(const char *s, size_t n)
@ -603,112 +599,17 @@ public:
  void getName(NAME &name) { getT(name); }
  void getUint(uint32_t &u) { getT(u); }

-  // Read the next size bytes and return them as a string
+  // Read the next 'size' bytes and return them as a string. Converts
+  // them from native encoding to UTF8 in the process.
  std::string getString(int size)
  {
-    // Not very optimized, but we can fix that later
-    char *ptr = new char[size];
+    char *ptr = ToUTF8::getBuffer(size);
    esm->read(ptr,size);

-    // Remove any zero terminators
-    for(int i=0; i<size; i++)
-      if(ptr[i] == 0)
-        size = i;
-
-    // Convert to std::string and return
-    std::string res(ptr,size);
-    delete[] ptr;
-    return convertToUTF8(res);
+    // Convert to UTF8 and return
+    return ToUTF8::getUtf8(ToUTF8::WINDOWS_1252);
  }

-    // Convert a string from the encoding used by Morrowind to UTF-8
-    std::string convertToUTF8 (std::string input)
-    {
-#ifdef __WIN32__
-        return input;
-#else
-        std::string output = "";
-
-        //create convert description
-        iconv_t cd = iconv_open ("UTF-8", "WINDOWS-1252");
-
-        if (cd == (iconv_t)-1)  //error handling
-        {
-            std::string errMsg = "Creating description for UTF-8 converting failed: ";
-
-            switch (errno)   //detailed error messages (maybe it contains too much detail :)
-            {
-            case EMFILE:
-                errMsg += "{OPEN_MAX} files descriptors are currently open in the calling process.";
-            case ENFILE:
-                errMsg += "Too many files are currently open in the system.";
-            case ENOMEM:
-                errMsg +="Insufficient storage space is available.";
-            case EINVAL:
-                errMsg += "The conversion specified by fromcode and tocode is not supported by the implementation.";
-
-            default:
-                errMsg += "Unknown Error\n";
-            }
-
-            fail (errMsg);
-
-        }
-        else
-        {
-            const size_t inputSize = input.size();
-
-            if (inputSize)  //input is not empty
-            {
-                //convert function doesn't accept const char *, therefore copy content into an char *
-                std::vector<char> inputBuffer (input.begin(), input.end());
-                char *inputBufferBegin = &inputBuffer[0];
-
-                size_t inputBytesLeft = inputSize; //bytes to convert
-
-                static const size_t outputSize = 1000;
-                size_t outputBytesLeft;
-
-                char outputBuffer[outputSize];
-                char *outputBufferBegin;
-
-                while (inputBytesLeft > 0)
-                {
-                    outputBytesLeft = outputSize;
-                    outputBufferBegin = outputBuffer;
-
-                    if (iconv (cd, &inputBufferBegin, &inputBytesLeft, &outputBufferBegin, &outputBytesLeft) == (size_t)-1)
-                    {
-                        switch (errno)
-                        {
-                        case E2BIG: //outputBuffer is full
-                            output += std::string (outputBuffer, outputSize);
-                            break;
-                        case EILSEQ:
-                            fail ("Iconv: Invalid multibyte sequence.\n");
-                            break;
-                        case EINVAL:
-                            fail ("Iconv: Incomplete multibyte sequence.\n");
-                            break;
-                        default:
-                            fail ("Iconv: Unknown Error\n");
-                        }
-
-                    }
-                }
-
-                //read only relevant bytes from outputBuffer
-                output += std::string (outputBuffer, outputSize - outputBytesLeft);
-
-            }
-        }
-
-        iconv_close (cd);
-
-        return output;
-    }
-#endif
-
  void skip(int bytes) { esm->seek(esm->tell()+bytes); }
  uint64_t getOffset() { return esm->tell(); }

--- a/components/to_utf8/.gitignore
+++ b/components/to_utf8/.gitignore
@ -0,0 +1 @@
+gen_iconv
--- a/components/to_utf8/Makefile
+++ b/components/to_utf8/Makefile
@ -0,0 +1,5 @@
+tables_gen.hpp: gen_iconv
+	gen_iconv > tables_gen.hpp
+
+gen_iconv: gen_iconv.cpp
+	g++ -Wall $^ -o $@
--- a/components/to_utf8/gen_iconv.cpp
+++ b/components/to_utf8/gen_iconv.cpp
@ -0,0 +1,86 @@
+// This program generates the file tables_gen.hpp
+
+#include <iostream>
+#include <iomanip>
+using namespace std;
+
+#include <iconv.h>
+#include <assert.h>
+
+void tab() { cout << "   "; }
+
+// write one number with a space in front of it and a comma after it
+void num(unsigned char i, bool last)
+{
+  cout << " 0x" << (unsigned)i;
+  if(!last) cout << ",";
+}
+
+// Write one table entry (UTF8 value), 1-5 bytes
+void writeChar(char *value, int length, bool last, const std::string &comment="")
+{
+  assert(length >= 1 && length <= 5);
+  tab();
+  num(length, false);
+  for(int i=0;i<5;i++)
+    num(value[i], last && i==4);
+
+  if(comment != "")
+    cout << " // " << comment;
+
+  cout << endl;
+}
+
+// What to write on missing characters
+void writeMissing(bool last)
+{
+  // Just write a space character
+  char value[5];
+  value[0] = ' ';
+  for(int i=1; i<5; i++)
+    value[i] = 0;
+  writeChar(value, 1, last, "not part of this charset");
+}
+
+int write_table(const std::string &charset, const std::string &tableName)
+{
+  // Write table header
+  cout << "static char " << tableName << "[] =\n{\n";
+
+  // Open conversion system
+  iconv_t cd = iconv_open ("UTF-8", charset.c_str());
+
+  // Convert each character from 0 to 255
+  for(int i=0; i<256; i++)
+    {
+      bool last = (i==255);
+
+      char input = i;
+      char *iptr = &input;
+      size_t ileft = 1;
+
+      char output[5];
+      for(int k=0; k<5; k++) output[k] = 0;
+      char *optr = output;
+      size_t oleft = 5;
+
+      size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);
+
+      if(res) writeMissing(last);
+      else writeChar(output, 5-oleft, last);
+    }
+
+  iconv_close (cd);
+
+  // Finish table
+  cout << "};\n";
+}
+
+int main()
+{
+  cout << hex;
+
+  // English
+  write_table("WINDOWS-1252", "windows_1252");
+  return 0;
+}
--- a/components/to_utf8/tables_gen.hpp
+++ b/components/to_utf8/tables_gen.hpp
@ -0,0 +1,259 @@
+static char windows_1252[] =
+{
+    0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x8, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x9, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xa, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xb, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xc, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xd, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xe, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0xf, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x10, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x11, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x12, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x13, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x14, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x15, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x16, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x17, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x18, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x19, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x1f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x21, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x22, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x23, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x24, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x25, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x26, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x27, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x28, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x29, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x2f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x30, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x31, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x32, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x33, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x34, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x35, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x36, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x37, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x38, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x39, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x3f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x40, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x41, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x42, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x43, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x44, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x45, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x46, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x47, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x48, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x49, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x4f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x50, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x51, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x52, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x53, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x54, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x55, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x56, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x57, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x58, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x59, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x5f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x60, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x61, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x62, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x63, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x64, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x65, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x66, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x67, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x68, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x69, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x6f, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x70, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x71, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x72, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x73, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x74, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x75, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x76, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x77, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x78, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x79, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7a, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7b, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7c, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7d, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7e, 0x0, 0x0, 0x0, 0x0,
+    0x1, 0x7f, 0x0, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x82, 0xac, 0x0, 0x0,
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
+    0x3, 0xe2, 0x80, 0x9a, 0x0, 0x0,
+    0x2, 0xc6, 0x92, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x9e, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xa6, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xa0, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xa1, 0x0, 0x0,
+    0x2, 0xcb, 0x86, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xb0, 0x0, 0x0,
+    0x2, 0xc5, 0xa0, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xb9, 0x0, 0x0,
+    0x2, 0xc5, 0x92, 0x0, 0x0, 0x0,
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
+    0x2, 0xc5, 0xbd, 0x0, 0x0, 0x0,
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
+    0x3, 0xe2, 0x80, 0x98, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x99, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x9c, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x9d, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xa2, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x93, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0x94, 0x0, 0x0,
+    0x2, 0xcb, 0x9c, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x84, 0xa2, 0x0, 0x0,
+    0x2, 0xc5, 0xa1, 0x0, 0x0, 0x0,
+    0x3, 0xe2, 0x80, 0xba, 0x0, 0x0,
+    0x2, 0xc5, 0x93, 0x0, 0x0, 0x0,
+    0x1, 0x20, 0x0, 0x0, 0x0, 0x0, // not part of this charset
+    0x2, 0xc5, 0xbe, 0x0, 0x0, 0x0,
+    0x2, 0xc5, 0xb8, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa0, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa1, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa2, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa3, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa4, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa5, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa6, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa7, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa8, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xa9, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xaa, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xab, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xac, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xad, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xae, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xaf, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb0, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb1, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb2, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb3, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb4, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb5, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb6, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb7, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb8, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xb9, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xba, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xbb, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xbc, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xbd, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xbe, 0x0, 0x0, 0x0,
+    0x2, 0xc2, 0xbf, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x80, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x81, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x82, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x83, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x84, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x85, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x86, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x87, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x88, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x89, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8a, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8b, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8c, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8d, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8e, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x8f, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x90, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x91, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x92, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x93, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x94, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x95, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x96, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x97, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x98, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x99, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9a, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9b, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9c, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9d, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9e, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0x9f, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa0, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa1, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa2, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa3, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa4, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa5, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa6, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa7, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa8, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xa9, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xaa, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xab, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xac, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xad, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xae, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xaf, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb0, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb1, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb2, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb3, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb4, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb5, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb6, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb7, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb8, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xb9, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xba, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xbb, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xbc, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xbd, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xbe, 0x0, 0x0, 0x0,
+    0x2, 0xc3, 0xbf, 0x0, 0x0, 0x0
+};
--- a/components/to_utf8/to_utf8.cpp
+++ b/components/to_utf8/to_utf8.cpp
@ -0,0 +1,159 @@
+#include "to_utf8.hpp"
+
+#include <vector>
+#include <assert.h>
+
+/* This file contains the code to translate from WINDOWS-1252 (native
+   charset used in English version of Morrowind) to UTF-8. The library
+   is designed to be extened to support more source encodings later,
+   which means that we may add support for Russian, Polish and Chinese
+   files and so on.
+
+   The code does not depend on any external library at
+   runtime. Instead, it uses a pregenerated table made with iconv (see
+   gen_iconv.cpp and the Makefile) which is located in tables_gen.hpp.
+
+   This is both faster and uses less dependencies. The tables would
+   only need to be regenerated if we are adding support more input
+   encodings. As such, there is no need to make the generator code
+   platform independent.
+
+   The library is optimized for the case of pure ASCII input strings,
+   which is the vast majority of cases at least for the English
+   version. A test of my version of Morrowind.esm got 130 non-ASCII vs
+   236195 ASCII strings, or less than 0.06% of strings containing
+   non-ASCII characters.
+
+   To optmize for this, ff the first pass of the string does not find
+   any non-ASCII characters, the entire string is passed along without
+   any modification.
+
+   Most of the non-ASCII strings are books, and are quite large. (The
+   non-ASCII characters are typically starting and ending quotation
+   marks.) Within these, almost all the characters are ASCII. For this
+   purpose, the library is also optimized for mostly-ASCII contents
+   even in the cases where some conversion is necessary.
+ */
+
+
+// Generated tables
+#include "tables_gen.hpp"
+
+// Shared global buffers, we love you.
+static std::vector<char> buf;
+static std::vector<char> output;
+static int size;
+
+// Make sure the given vector is large enough for 'size' bytes,
+// including a terminating zero after it.
+static void resize(std::vector<char> &buf, size_t size)
+{
+  if(buf.size() <= size)
+    // Add some extra padding to reduce the chance of having to resize
+    // again later.
+    buf.resize(3*size);
+
+  // And make sure the string is zero terminated
+  buf[size] = 0;
+}
+
+// This is just used to spew out a reusable input buffer for the
+// conversion process.
+char *ToUTF8::getBuffer(int s)
+{
+  // Remember the requested size
+  size = s;
+  resize(buf, size);
+  return &buf[0];
+}
+
+/** Get the total length length needed to decode the given string with
+    the given translation array. The arrays are encoded with 6 bytes
+    per character, with the first giving the length and the next 5 the
+    actual data.
+
+    The function serves a dual purpose for optimization reasons: it
+    checks if the input is pure ascii (all values are <= 127). If this
+    is the case, then the ascii parameter is set to true, and the
+    caller can optimize for this case.
+ */
+static size_t getLength(const char *arr, const char* input, bool &ascii)
+{
+  ascii = true;
+  size_t len = 0;
+  unsigned char inp = *input;
+  while(inp)
+    {
+      if(inp > 127) ascii = false;
+      len += arr[inp*6];
+      inp = *(++input);
+    }
+  return len;
+}
+
+// Translate one character 'ch' using the translation array 'arr', and
+// advance the output pointer accordingly.
+static void copyFromArray(const char *arr, unsigned char ch, char* &out)
+{
+  // Optimize for ASCII values
+  if(ch < 128)
+    {
+      *(out++) = ch;
+      return;
+    }
+
+  const char *in = arr + ch*6;
+  int len = *(in++);
+  for(int i=0; i<len; i++)
+    *(out++) = *(in++);
+}
+
+std::string ToUTF8::getUtf8(ToUTF8::FromType from)
+{
+  // Pick translation array
+  const char *arr;
+  if(from == ToUTF8::WINDOWS_1252)
+    arr = windows_1252;
+  else
+    assert(0);
+
+  // Double check that the input string stops at some point (it might
+  // contain zero terminators before this, inside its own data, which
+  // is also ok.)
+  const char* input = &buf[0];
+  assert(input[size] == 0);
+
+  // TODO: The rest of this function is designed for single-character
+  // input encodings only. It also assumes that the input the input
+  // encoding shares its first 128 values (0-127) with ASCII. These
+  // conditions must be checked again if you add more input encodings
+  // later.
+
+  // Compute output length, and check for pure ascii input at the same
+  // time.
+  bool ascii;
+  size_t outlen = getLength(arr, input, ascii);
+
+  // If we're pure ascii, then don't bother converting anything.
+  if(ascii)
+    return std::string(input, outlen);
+
+  // Make sure the output is large enough
+  resize(output, outlen);
+  char *out = &output[0];
+
+  // Translate
+  while(*input)
+    copyFromArray(arr, *(input++), out);
+
+  // Make sure that we wrote the correct number of bytes
+  assert((out-&output[0]) == (int)outlen);
+
+  // And make extra sure the output is null terminated
+  assert(output.size() > outlen);
+  assert(output[outlen] == 0);
+
+  // Return a string
+  return std::string(&output[0], outlen);
+}
+
--- a/components/to_utf8/to_utf8.hpp
+++ b/components/to_utf8/to_utf8.hpp
@ -0,0 +1,24 @@
+#ifndef COMPONENTS_TOUTF8_H
+#define COMPONENTS_TOUTF8_H
+
+#include <string>
+
+namespace ToUTF8
+{
+  // These are all the currently supported code pages
+  enum FromType
+    {
+      WINDOWS_1252      // Used by English version of Morrowind (and
+                        // probably others)
+    };
+
+  // Return a writable buffer of at least 'size' bytes. The buffer
+  // does not have to be freed.
+  char* getBuffer(int size);
+
+  // Convert the previously written buffer to UTF8 from the given code
+  // page.
+  std::string getUtf8(FromType from);
+}
+
+#endif