1
0
mirror of https://gitlab.com/OpenMW/openmw.git synced 2025-02-04 03:40:14 +00:00

Use a switch-case instead of a map, on elsid@' advice

elsid@ said:

> From my measurements static map + exceptions is slower than original code
when loading polish morrowind localization by 25% with GCC 10.2.0. Switch-based
solution is about 25x times faster than the original and static map with find
only is ~20x faster.
This commit is contained in:
jvoisin 2021-05-21 12:56:46 +02:00
parent 770f91de77
commit 5470b3168b

View File

@ -75,63 +75,55 @@ namespace
return unicode;
}
/// This is a hack for Polish font
unsigned char mapUtf8Char(unsigned char c) {
switch(c){
case 0x80: return 0xc6;
case 0x81: return 0x9c;
case 0x82: return 0xe6;
case 0x83: return 0xb3;
case 0x84: return 0xf1;
case 0x85: return 0xb9;
case 0x86: return 0xbf;
case 0x87: return 0x9f;
case 0x88: return 0xea;
case 0x89: return 0xea;
case 0x8a: return 0x00; // not contained in win1250
case 0x8b: return 0x00; // not contained in win1250
case 0x8c: return 0x8f;
case 0x8d: return 0xaf;
case 0x8e: return 0xa5;
case 0x8f: return 0x8c;
case 0x90: return 0xca;
case 0x93: return 0xa3;
case 0x94: return 0xf6;
case 0x95: return 0xf3;
case 0x96: return 0xaf;
case 0x97: return 0x8f;
case 0x99: return 0xd3;
case 0x9a: return 0xd1;
case 0x9c: return 0x00; // not contained in win1250
case 0xa0: return 0xb9;
case 0xa1: return 0xaf;
case 0xa2: return 0xf3;
case 0xa3: return 0xbf;
case 0xa4: return 0x00; // not contained in win1250
case 0xe1: return 0x8c;
// case 0xe1: return 0x8c; // Can't remember if this was supposed to read 0xe2, or is it just an extraneous copypaste?
case 0xe3: return 0x00; // not contained in win1250
case 0xf5: return 0x00; // not contained in win1250
default: return c;
}
}
// getUtf8, aka the worst function ever written.
// This includes various hacks for dealing with Morrowind's .fnt files that are *mostly*
// in the expected win12XX encoding, but also have randomly swapped characters sometimes.
// Looks like the Morrowind developers found standard encodings too boring and threw in some twists for fun.
std::string getUtf8 (unsigned char c, ToUTF8::Utf8Encoder& encoder, ToUTF8::FromType encoding)
{
if (encoding == ToUTF8::WINDOWS_1250)
{
// Hacks for polish font
unsigned char win1250;
static const std::map<unsigned char, unsigned char> conv {
{0x80, 0xc6},
{0x81, 0x9c},
{0x82, 0xe6},
{0x83, 0xb3},
{0x84, 0xf1},
{0x85, 0xb9},
{0x86, 0xbf},
{0x87, 0x9f},
{0x88, 0xea},
{0x89, 0xea},
{0x8a, 0x00}, // not contained in win1250
{0x8b, 0x00}, // not contained in win1250
{0x8c, 0x8f},
{0x8d, 0xaf},
{0x8e, 0xa5},
{0x8f, 0x8c},
{0x90, 0xca},
{0x93, 0xa3},
{0x94, 0xf6},
{0x95, 0xf3},
{0x96, 0xaf},
{0x97, 0x8f},
{0x99, 0xd3},
{0x9a, 0xd1},
{0x9c, 0x00}, // not contained in win1250
{0xa0, 0xb9},
{0xa1, 0xaf},
{0xa2, 0xf3},
{0xa3, 0xbf},
{0xa4, 0x00}, // not contained in win1250
{0xe1, 0x8c},
// Can't remember if this was supposed to read 0xe2, or is it just an extraneous copypaste?
//{0xe1, 0x8c},
{0xe3, 0x0}, // not contained in win1250
{0xf5, 0x0} // not contained in win1250
};
try
{
win1250 = conv.at(c);
}
catch (std::out_of_range)
{
win1250 = c;
}
return encoder.getUtf8(std::string(1, win1250));
}
if (encoding == ToUTF8::WINDOWS_1250) // Hack for polish font
return encoder.getUtf8(std::string(1, mapUtf8Char(c)));
else
return encoder.getUtf8(std::string(1, c));
}