I found another solution, better than before.
Some e-mails subjects has different encodings, I noticed:
- Latin2, encoded like: =?ISO-8859-2?Q?...?=
- UTF-8 Base64 like:
=?utf-8?B?Wm9iYWN6Y2llIGNvIGRsYSBXYXMgcHJ6eWdvdG93YWxpxZtteSAvIHN0eWN6ZcWEIHcgTGFzZXJwYXJrdQ==?=
- UTF-8 quoted printable like:
=?utf-8?Q?...?=
- No encoding (if only ASCII characters) like: ...
So with POCO (Base64Decoder, Latin2Encoding, UTF8Encoding, QuotedPrintableDecoder) I managed to convert all the cases:
#include <iostream>
#include <string>
#include <sstream>
#include <Poco/Net/POP3ClientSession.h>
#include <Poco/Net/MessageHeader.h>
#include <Poco/Net/MailMessage.h>
#include <Poco/Base64Decoder.h>
#include <Poco/Latin2Encoding.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Net/QuotedPrintableDecoder.h>
using namespace std;
class Encoder
{
public:
Encoder(const string& encodedText)
{
isStringEncoded = isEncoded(encodedText);
if (!isStringEncoded)
{
extractedEncodedSubjectToConvert = encodedText;
return;
}
splitEncodedText(encodedText);
}
string convert()
{
if (isStringEncoded)
{
if (Poco::Latin2Encoding().isA(charset))
return decodeFromLatin2();
if (Poco::UTF8Encoding().isA(charset))
return decodeFromUtf8();
}
return extractedEncodedSubjectToConvert;
}
private:
void splitEncodedText(const string& encodedText)
{
/// encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
const int charsetBeginPosition = strlen(sequenceBeginEncodedText);
const int charsetEndPosition = encodedText.find("?", charsetBeginPosition);
charset = encodedText.substr(charsetBeginPosition, charsetEndPosition-charsetBeginPosition);
const int encodingPosition = charsetEndPosition + strlen("?");
encoding = encodedText[encodingPosition];
const int lenghtOfEncodedText = encodedText.length() - encodingPosition-strlen(sequenceBeginEncodedText)-strlen(sequenceEndEncodedText);
extractedEncodedSubjectToConvert = encodedText.substr(encodingPosition+2, lenghtOfEncodedText);
}
bool isEncoded(const string& encodedSubject)
{
if (encodedSubject.size() < 4)
return false;
if (0 != encodedSubject.find(sequenceBeginEncodedText))
return false;
const unsigned positionOfLastTwoCharacters = encodedSubject.size() - strlen(sequenceEndEncodedText);
return positionOfLastTwoCharacters == encodedSubject.rfind(sequenceEndEncodedText);
}
string decodeFromLatin2()
{
size_t positionOfAssignment = -1;
while (true)
{
positionOfAssignment = extractedEncodedSubjectToConvert.find('=', positionOfAssignment+1);
if (string::npos != positionOfAssignment)
{
const string& charHexCode = extractedEncodedSubjectToConvert.substr(positionOfAssignment + 1, 2);
replaceAllSubstringsWithUnicode(extractedEncodedSubjectToConvert, charHexCode);
}
else
break;
}
return extractedEncodedSubjectToConvert;
}
void replaceAllSubstringsWithUnicode(string& s, const string& charHexCode)
{
static Poco::UTF8Encoding encodingConverter;
const int charCode = stoi(charHexCode, nullptr, 16);
char buffer[10] = {};
encodingConverter.convert(charCode, (unsigned char*)buffer, sizeof(buffer));
replaceAll(s, '=' + charHexCode, buffer);
}
void replaceAll(string& s, const string& replaceFrom, const string& replaceTo)
{
size_t needlePosition = -1;
while (true)
{
needlePosition = s.find(replaceFrom, needlePosition + 1);
if (string::npos == needlePosition)
break;
s.replace(needlePosition, replaceFrom.length(), replaceTo);
}
}
string decodeFromUtf8()
{
if('B' == toupper(encoding))
{
return decodeFromBase64();
}
else // if Q:
{
return decodeFromQuatedPrintable();
}
}
string decodeFromBase64()
{
istringstream is(extractedEncodedSubjectToConvert);
Poco::Base64Decoder e64(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(e64, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
string decodeFromQuatedPrintable()
{
replaceAll(extractedEncodedSubjectToConvert, "_", " ");
istringstream is(extractedEncodedSubjectToConvert);
Poco::Net::QuotedPrintableDecoder qp(is);
extractedEncodedSubjectToConvert.clear();
string buffer;
while(getline(qp, buffer))
extractedEncodedSubjectToConvert += buffer;
return extractedEncodedSubjectToConvert;
}
private:
string charset;
char encoding;
string extractedEncodedSubjectToConvert;
bool isStringEncoded;
static constexpr const char* sequenceBeginEncodedText = "=?";
static constexpr const char* sequenceEndEncodedText = "?=";
};
int main()
{
Poco::Net::POP3ClientSession session("poczta.o2.pl");
session.login("my mail", "my password");
Poco::Net::POP3ClientSession::MessageInfoVec messages;
session.listMessages(messages);
Poco::Net::MessageHeader header;
Poco::Net::MailMessage message;
auto currentMessage = messages[0];
session.retrieveHeader(currentMessage.id, header);
session.retrieveMessage(currentMessage.id, message);
const string subject = message.getSubject();
Encoder encoder(subject);
cout << "Original subject: " << subject << endl;
cout << "Encoded: " << encoder.convert() << endl;
}