-2

I was trying to load this json data in hive

{
    "id": "0001",
    "type": "donut",
     "name": "Cake",
     "ppu": 0.55,
     "batters":
         {
             "batter":
                 [
                     { "id": "1001", "type": "Regular" },
                     { "id": "1002", "type": "Chocolate" },
                    { "id": "1003", "type": "Blueberry" },
                    { "id": "1004", "type": "Devil's Food" }
                ]
        },
    "topping":
        [
            { "id": "5001", "type": "None" },
            { "id": "5002", "type": "Glazed" },
            { "id": "5005", "type": "Sugar" },
            { "id": "5007", "type": "Powdered Sugar" },
            { "id": "5006", "type": "Chocolate with Sprinkles" },
            { "id": "5003", "type": "Chocolate" },
            { "id": "5004", "type": "Maple" }
        ]
}

using DDL commands

ADD JAR /home/cloudera/Downloads/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;

CREATE EXTERNAL TABLE format.json_serde (
  `id` string,
  `type` string,
  `name` string,
 `ppu` float,       
  batters` struct < `batter`:array < struct <`bid`:string, `btype`:string >>>,
  `topping`:array < struct<`tid`:int, `ttype`:string>>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';

is throwing me error

FAILED: ParseException line 7:11 cannot recognize input near ':' 'array' '<' in column type </b>
dtolnay
  • 9,621
  • 5
  • 41
  • 62
unchained
  • 19
  • 6

2 Answers2

0
  1. You got typos
    ttype`:string should be ttype:string
    battersstruct should be batters struct
    topping:array should be topping array

  2. JSON SerDe mapping is done by name.
    Your structs fields names should match the actual names, e.g. id and not bid or tid, otherwise you'll get NULL values for these fields.

  3. There is already a JSON SerDe whicg is part of the Hive installation. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe


create external table json_serde 
( 
    id      string
   ,type    string 
   ,name    string 
   ,ppu     float
   ,batters struct<batter:array<struct<id:string,type:string>>>
   ,topping array<struct<id:string,type:string>>
) 
row format serde 
'org.apache.hive.hcatalog.data.JsonSerDe' 
stored as textfile
;

select * from json_serde
;

+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|  id  | type  | name |        ppu        |                                                                     batters                                                                      |                                                                                                                  topping                                                                                                                  |
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 0001 | donut | Cake | 0.550000011920929 | {"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil'sFood"}]} | [{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"PowderedSugar"},{"id":"5006","type":"ChocolatewithSprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}] |
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
David דודו Markovitz
  • 42,900
  • 6
  • 64
  • 88
  • Thanks, when i tried to query it returned folllowing error **Failed with exception java.io.IOException:org.apache.hadoop.hive.serde2.SerDeException: org.codehaus.jackson.JsonParseException: Unexpected end-of-input: expected close marker for OBJECT (from [Source: java.io.ByteArrayInputStream@73304204; line: 1, column: 0]) at [Source: java.io.ByteArrayInputStream@73304204; line: 1, column: 3]** Please let me know if you performed any formatting on the input data – unchained Apr 27 '17 at 03:50
  • Any JSON document should written in a single line. I'm not aware of any JSON SerDe that knows how to deal with multi-lines document. – David דודו Markovitz Apr 27 '17 at 05:02
0

It worked when i removed the semicolons near topping. Thanks


CREATE EXTERNAL TABLE format.json_serde (
id string,
type string,
name string,
ppu float,

batters struct<batter:array< struct<bid:string, btype:string >>>,


topping array< struct<tid:string, ttype:string>> )

unchained
  • 19
  • 6