0

I have a method reads a json file and returns a const char* that can be any text, including emojis. I don't have access to the source of this method.

For example, I created a json file with the england flag, ({message: "\uD83C\uDFF4\uDB40\uDC67\uDB40\uDC62\uDB40\uDC65\uDB40\uDC6E\uDB40\uDC67\uDB40\uDC7F"}).

When I call that method, it returns something like 🏴󠁧󠁢󠁥󠁮󠁧󠁿, but in order to use it properly, I need to convert it to icu::UnicodeString because I use another method (closed source again) that expects it.

The only way I found to make it work was something like:

icu::UnicodeString unicode;
unicode.setTo((UChar*)convertMessage().data());
std::string messageAsString;
unicode.toUTF8String(messageAsString);

after doing that, messageAsString is usable and everything works.

convertMessage() is a method that uses std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>::from_bytes(str).

My question is, is there a way to create a icu::UnicodeString without using that extra convertMessage() call?

user3900456
  • 2,003
  • 3
  • 25
  • 33
  • if it works why you dont want to use std::wstring_convert, char16_t>::from_bytes(str)? – idris Feb 12 '20 at 06:41
  • because VC++ keeps complaining, then I need to define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING and then it would be one thing less to depend on – user3900456 Feb 12 '20 at 06:59
  • Icu has ucnv_toUChars function for conversion. You can use this to create UnicodeString from utf8 string. Let me know if you need help about using it. – idris Feb 12 '20 at 07:02
  • thanks! I've found this code `int convert_from_encoding(char* src, int srclen, UChar* dst, int dstlen, char* charset) { UConverter* conv = NULL; UErrorCode status = U_ZERO_ERROR; int32_t len = 0; conv = ucnv_open(charset, &status); if (status == U_ZERO_ERROR) { len = ucnv_toUChars(conv, dst, dstlen, src, srclen, &status); if (status != U_ZERO_ERROR) fprintf(stderr, "encoding: %s\n", u_errorName(status)); len *= sizeof(UChar); ucnv_close(conv); } return len; }` but I'm not sure how to use it... – user3900456 Feb 12 '20 at 07:18

1 Answers1

1

This is sample usage of ucnv_toUChars function. I took these function from postgresql source code and used it for my project.

UConverter *icu_converter;

  static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, int32_t nbytes)
{
    UErrorCode  status;
    int32_t     len_uchar;

    status = U_ZERO_ERROR;
    len_uchar = ucnv_toUChars(icu_converter, NULL, 0,buff, nbytes, &status);
    if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
        return -1;

    *buff_uchar = (UChar *) malloc((len_uchar + 1) * sizeof(**buff_uchar));

    status = U_ZERO_ERROR;
    len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,buff, nbytes, &status);
    if (U_FAILURE(status))
        assert(0); //(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));

    return len_uchar;
}
static int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
{
    UErrorCode  status;
    int32_t     len_result;

    status = U_ZERO_ERROR;
    len_result = ucnv_fromUChars(icu_converter, NULL, 0,
        buff_uchar, len_uchar, &status);
    if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
        assert(0); // (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));

    *result = (char *) malloc(len_result + 1);

    status = U_ZERO_ERROR;
    len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
        buff_uchar, len_uchar, &status);
    if (U_FAILURE(status))
        assert(0); // (errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));

    return len_result;
}

void main() {

    const char *utf8String = "Hello";
    int len = 5;
    UErrorCode  status = U_ZERO_ERROR;

    icu_converter = ucnv_open("utf8", &status);
    assert(status <= U_ZERO_ERROR);

    UChar      *buff_uchar;
    int32_t len_uchar = icu_to_uchar(&buff_uchar, ut8String, len);

    // use buff_uchar

    free(buff_uchar);
}
idris
  • 488
  • 3
  • 6