I am trying to convert cp1251 text to utf-8. What I am doing here is creating the buffer of hex numbers of given symbols in cp1251 to later convert those hex symbols to utf-8. The problem is that sometimes the converted string has some trash symbols in the end.
The output of converting the same string many times (203 ñòåï ÒÖÍÐ.466219.007 Èíòåðàêòèâíûé êîìïëåêñ NextPanel 43/NAUO1):
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1омплекс NextPanelВ 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO14V
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1sгцf¤№
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1п}тУї0bЊ.Z¶ї¬ЁЌ€/ГїаА Om›Ґї
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1sгцf¤№
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1™™™™Щ?
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
theNamePrefix = 203 степ ТЦНР.466219.007 Интерактивный комплекс NextPanel 43/NAUO1
char *ConvertWindows1251ToUtf8(string stringToConvert)
{
// string tmpString = stringToConvert.ToCString();
const char *tmpCharArray = stringToConvert.c_str();
vector<char> charBuffer;
char *buffer = new char[char_traits<char>::length(tmpCharArray)];
int latter = 0;
for (int i = 0; i < std::char_traits<char>::length(tmpCharArray); i++)
{
string tmpHexLatter;
int hexLatter = 0xFF & tmpCharArray[i];
stringstream ss;
ss << hex << hexLatter;
tmpHexLatter = ss.str();
if (hexLatter != 0xc3)
{
if (hexLatter != 0x30 && hexLatter != 0x31 && hexLatter != 0x32 && hexLatter != 0x33 && hexLatter != 0x34 && hexLatter != 0x35 && hexLatter != 0x36 && hexLatter != 0x37 && hexLatter != 0x38 && hexLatter != 0x39 && hexLatter != 0x61 && hexLatter != 0x62 && hexLatter != 0x63 && hexLatter != 0x64 && hexLatter != 0x65 && hexLatter != 0x66 && hexLatter != 0x67 && hexLatter != 0x68 && hexLatter != 0x69 && hexLatter != 0x6A && hexLatter != 0x6B && hexLatter != 0x6C && hexLatter != 0x6D && hexLatter != 0x6E && hexLatter != 0x6F && hexLatter != 0x70 && hexLatter != 0x71 && hexLatter != 0x72 && hexLatter != 0x73 && hexLatter != 0x74 && hexLatter != 0x75 && hexLatter != 0x76 && hexLatter != 0x77 && hexLatter != 0x78 && hexLatter != 0x79 && hexLatter != 0x7A && hexLatter != 0x41 && hexLatter != 0x42 && hexLatter != 0x43 && hexLatter != 0x44 && hexLatter != 0x45 && hexLatter != 0x46 && hexLatter != 0x47 && hexLatter != 0x48 && hexLatter != 0x49 && hexLatter != 0x4A && hexLatter != 0x4B && hexLatter != 0x4C && hexLatter != 0x4D && hexLatter != 0x4E && hexLatter != 0x4F && hexLatter != 0x50 && hexLatter != 0x51 && hexLatter != 0x52 && hexLatter != 0x53 && hexLatter != 0x54 && hexLatter != 0x55 && hexLatter != 0x56 && hexLatter != 0x57 && hexLatter != 0x58 && hexLatter != 0x59 && hexLatter != 0x5A && hexLatter != 0x2E)
hexLatter += 64;
if (hexLatter == 0x60)
hexLatter = 0xA0;
if (hexLatter == 0x6F)
hexLatter = 0x2F;
stringstream ss;
ss << hex << hexLatter;
string tmpHex = ss.str();
tmpHexLatter = "0x" + tmpHex;
latter = stoi(tmpHexLatter, {}, 16);
charBuffer.push_back((char)latter);
}
}
for (int i = 0; i < charBuffer.size(); i++)
{
buffer[i] = charBuffer[i];
}
return g_convert(buffer, -1, "utf-8", "Windows-1251", NULL, NULL, NULL);
/*string tmpStr = stringToConvert.ToCString();
std::unique_ptr<gchar, void (*)(gpointer)> p(g_convert(tmpStr.c_str(), -1, "utf-8", "Windows-1251", NULL, NULL, NULL), g_free);
return TCollection_AsciiString(p.get());*/
}