3

I'm writing a module for complicated application and my module should process json response, returned by web server. So, my issue is about how can I decode such kind of text:

\u041f\u043e\u0438\u0441\u043a \u043f\u043e \u0444\u0430\u043c\u0438\u043b\u0438\u0438, \u0438\u043c\u0435\u043d\u0438 (\u043e\u0442\u0447\u0435\u0441\u0442\u0432\u0443

It's cyrillic text and Mozilla Firefox displays it as it should be. How can I process that guys? I'm on Delphi 2010.

kseen
  • 359
  • 8
  • 56
  • 104
  • Your JSON-decoding library should already handle that for you. What library are you using? Also, that's not valid JSON; are you sure that's not actually inside a string? – Rob Kennedy Mar 15 '12 at 04:06
  • I don't get this using some JSON library. I just get it using "raw" HTTP GET queries. – kseen Mar 15 '12 at 04:10
  • I understand that, but if what you're receiving is supposed to be JSON-encoded, as you say it is, then you should use a JSON-decoding library to process it instead of writing your own. – Rob Kennedy Mar 15 '12 at 04:18
  • Yep, it is so. RRUZ below proposed good library for that. – kseen Mar 15 '12 at 05:11
  • 1
    This is not UTF-8, but string containing escaped UTF-16 code points. – OnTheFly Mar 15 '12 at 07:49
  • 1
    I see backslashes, spaces, numbers, Latin letters, a comma, and a parenthesis, @User. All are UTF-8 code points. – Rob Kennedy Mar 15 '12 at 18:08
  • @RobK, you are probably looking at something else. Consider reading the specification - http://tools.ietf.org/html/rfc4627 – OnTheFly Mar 15 '12 at 18:47
  • I'm looking at the second paragraph of this question, @User. The one that starts out `\u041f\u043e`. It clearly qualifies as UTF-8 text. It's a string of UTF-8 code points representing escape sequences for UTF-16 code points. The default encoding for JSON text is UTF-8, after all. – Rob Kennedy Mar 15 '12 at 20:38
  • @RobKennedy, heh, yes, encoding of JSON itself shall be Unicode and can be determined by examining null pattern. But check out Windows character map for quoted code points and compare found glyphs to the screenshot in the RRUZ's answer :-) – OnTheFly Mar 15 '12 at 21:08

3 Answers3

15

You can use the DBXJSON unit which is included in Delphi 2010

uses
 DBXJSON;

const
JsonUt8  ='"\u041f\u043e\u0438\u0441\u043a \u043f\u043e \u0444\u0430\u043c\u0438\u043b\u0438\u0438, \u0438\u043c\u0435\u043d\u0438 (\u043e\u0442\u0447\u0435\u0441\u0442\u0432\u0443"';

procedure TForm59.Button1Click(Sender: TObject);
var
  LJSONValue: TJSONValue;
begin
  LJSONValue:=TJSONObject.ParseJSONValue(TEncoding.UTF8.GetBytes(JsonUt8),0);
  Edit1.Text:=LJSONValue.ToString;
end;

enter image description here

RRUZ
  • 134,889
  • 20
  • 356
  • 483
2

OK guys, here is complete code that get me managed with this issue:

function Unescape(const s: AnsiString): string;
var
  i: Integer;
  j: Integer;
  c: Integer;
begin
  // Make result at least large enough. This prevents too many reallocs
  SetLength(Result, Length(s));
  i := 1;
  j := 1;
  while i <= Length(s) do begin
    if s[i] = '\' then begin
      if i < Length(s) then begin
        // escaped backslash?
        if s[i + 1] = '\' then begin
          Result[j] := '\';
          inc(i, 2);
        end
        // convert hex number to WideChar
        else if (s[i + 1] = 'u') and (i + 1 + 4 <= Length(s))
                and TryStrToInt('$' + string(Copy(s, i + 2, 4)), c) then begin
          inc(i, 6);
          Result[j] := WideChar(c);
        end else begin
          raise Exception.CreateFmt('Invalid code at position %d', [i]);
        end;
      end else begin
        raise Exception.Create('Unexpected end of string');
      end;
    end else begin
      Result[j] := WideChar(s[i]);
      inc(i);
    end;
    inc(j);
  end;

  // Trim result in case we reserved too much space
  SetLength(Result, j - 1);
end;

const
  NormalizationC = 1;

function NormalizeString(NormForm: Integer; lpSrcString: PWideChar; cwSrcLength: Integer;
 lpDstString: PWideChar; cwDstLength: Integer): Integer; stdcall; external 'Normaliz.dll';

function Normalize(const s: string): string;
var
  newLength: integer;
begin
  // in NormalizationC mode the result string won't grow longer than the input string
  SetLength(Result, Length(s));
  newLength := NormalizeString(NormalizationC, PChar(s), Length(s), PChar(Result), Length(Result));
  SetLength(Result, newLength);
end;

function UnescapeAndNormalize(const s: AnsiString): string;
begin
  Result := Normalize(Unescape(s));
end;

The code is stolen from this answer

Community
  • 1
  • 1
kseen
  • 359
  • 8
  • 56
  • 104
0

In case if somebody will need simple function to decode JSON escaped string:

function JSONUnescape(const Source: string; CRLF: string = #13#10): string;
const
  ESCAPE_CHAR = '\';
  QUOTE_CHAR = '"';
  EXCEPTION_FMT = 'Invalid escape at position %d';
var
  EscapeCharPos, TempPos: Integer;
  Temp: string;
  IsQuotedString: Boolean;
begin
  result := '';
  IsQuotedString := (Source[1] = QUOTE_CHAR) and
    (Source[Length(Source)] = QUOTE_CHAR);
  EscapeCharPos := Pos(ESCAPE_CHAR, Source);
  TempPos := 1;
  while EscapeCharPos > 0 do
  begin
    result := result + Copy(Source, TempPos, EscapeCharPos - TempPos);
    TempPos := EscapeCharPos;
    if EscapeCharPos < Length(Source) - Integer(IsQuotedString) then
      case Source[EscapeCharPos + 1] of
        't':
          Temp := #9;
        'n':
          Temp := CRLF;
        '\':
          Temp := '\';
        '"':
          Temp := '"';
        'u':
          begin
            if EscapeCharPos + 4 < Length(Source) - Integer(IsQuotedString) then
              Temp := Chr(StrToInt('$' + Copy(Source, EscapeCharPos + 2, 4)))
            else
              raise Exception.Create(Format(EXCEPTION_FMT, [EscapeCharPos]));
            Inc(TempPos, 4);
          end;
      else
        raise Exception.Create(Format(EXCEPTION_FMT, [EscapeCharPos]));
      end
    else
      raise Exception.Create(Format(EXCEPTION_FMT, [EscapeCharPos]));
    Inc(TempPos, 2);
    result := result + Temp;
    EscapeCharPos := Pos(ESCAPE_CHAR, Source, TempPos);
  end;
  result := result + Copy(Source, TempPos, Length(Source) - TempPos + 1);
end;

Usage:

JSONUnescape('\u2764Love Delphi\u2764');
// Returns '❤Love Delphi❤'
JSONUnescape('"\u2764Love\tDelphi\u2764"');
// Returns '"❤Love  Delphi❤"';
JSONUnescape('\\\Invalid escaped text');
// Raises and exception 'Invalid escape at position 3'
Olvin Roght
  • 7,677
  • 2
  • 16
  • 35