1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-01-25 06:35:30 +00:00
OpenMW/components/to_utf8/gen_iconv.cpp

125 lines
3.3 KiB
C++
Raw Normal View History

// This program generates the file tables_gen.hpp
#include <iostream>
#include <cassert>
2022-09-22 21:26:05 +03:00
#include <iconv.h>
2022-09-22 21:26:05 +03:00
void tab()
{
std::cout << " ";
}
// write one number with a space in front of it and a comma after it
void num(char i, bool last)
{
2022-09-22 21:26:05 +03:00
// Convert i to its integer value, i.e. -128 to 127. Printing it directly
// would result in non-printable characters in the source code, which is bad.
std::cout << " " << static_cast<int>(i);
if (!last)
std::cout << ",";
}
// Write one table entry (UTF8 value), 1-5 bytes
2022-09-22 21:26:05 +03:00
void writeChar(char* value, int length, bool last, const std::string& comment = "")
{
2022-09-22 21:26:05 +03:00
assert(length >= 1 && length <= 5);
tab();
num(length, false);
for (int i = 0; i < 5; i++)
num(value[i], last && i == 4);
2022-09-22 21:26:05 +03:00
if (comment != "")
std::cout << " // " << comment;
2022-09-22 21:26:05 +03:00
std::cout << std::endl;
}
// What to write on missing characters
void writeMissing(bool last)
{
2022-09-22 21:26:05 +03:00
// Just write a space character
char value[5];
value[0] = ' ';
for (int i = 1; i < 5; i++)
value[i] = 0;
writeChar(value, 1, last, "not part of this charset");
}
2022-09-22 21:26:05 +03:00
int write_table(const std::string& charset, const std::string& tableName)
{
2022-09-22 21:26:05 +03:00
// Write table header
std::cout << "const static signed char " << tableName << "[] =\n{\n";
2022-09-22 21:26:05 +03:00
// Open conversion system
iconv_t cd = iconv_open("UTF-8", charset.c_str());
2022-09-22 21:26:05 +03:00
// Convert each character from 0 to 255
for (int i = 0; i < 256; i++)
{
2022-09-22 21:26:05 +03:00
bool last = (i == 255);
2022-09-22 21:26:05 +03:00
char input = i;
char* iptr = &input;
size_t ileft = 1;
2022-09-22 21:26:05 +03:00
char output[5];
for (int k = 0; k < 5; k++)
output[k] = 0;
char* optr = output;
size_t oleft = 5;
2022-09-22 21:26:05 +03:00
size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);
2022-09-22 21:26:05 +03:00
if (res)
writeMissing(last);
else
writeChar(output, 5 - oleft, last);
}
2022-09-22 21:26:05 +03:00
iconv_close(cd);
2022-09-22 21:26:05 +03:00
// Finish table
std::cout << "};\n";
2022-09-22 21:26:05 +03:00
return 0;
}
int main()
{
2022-09-22 21:26:05 +03:00
// Write header guard
std::cout << "#ifndef COMPONENTS_TOUTF8_TABLE_GEN_H\n#define COMPONENTS_TOUTF8_TABLE_GEN_H\n\n";
2022-09-22 21:26:05 +03:00
// Write namespace
std::cout << "namespace ToUTF8\n{\n\n";
2022-09-22 21:26:05 +03:00
// Central European and Eastern European languages that use Latin script, such as
// Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian (Latin script), Romanian and Albanian.
std::cout << "\n/// Central European and Eastern European languages that use Latin script,"
"\n/// such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian,"
"\n/// Serbian (Latin script), Romanian and Albanian."
"\n";
write_table("WINDOWS-1250", "windows_1250");
2022-09-22 21:26:05 +03:00
// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic and other languages
std::cout << "\n/// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic"
"\n/// and other languages"
"\n";
write_table("WINDOWS-1251", "windows_1251");
2022-09-22 21:26:05 +03:00
// English
std::cout << "\n/// Latin alphabet used by English and some other Western languages"
"\n";
write_table("WINDOWS-1252", "windows_1252");
2022-09-22 21:26:05 +03:00
write_table("CP437", "cp437");
2013-06-06 22:13:30 +02:00
2022-09-22 21:26:05 +03:00
// Close namespace
std::cout << "\n}\n\n";
2022-09-22 21:26:05 +03:00
// Close header guard
std::cout << "#endif\n\n";
2022-09-22 21:26:05 +03:00
return 0;
}