0

I need to parse the complex JSON (below) IN SCALA to get the values of "expression" and "value" in "measure" key i.e I need List (COUNT, COUNT_DISTINCT ...) and List (1,tbl1.USER_ID ...).

I tried multiple options, but it is not working. Any help is appreciated

{
  "uuid": "uuidddd",
  "last_modified": 1559080222953,
  "version": "2.6.1.0",
  "name": "FULL_DAY_2_mand_date",
  "is_draft": false,
  "model_name": "FULL_DAY_1_may05",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "PLATFORM",
      "table": "tbl1",
      "column": "PLATFORM",
      "derived": null
    },
    {
      "name": "OS_VERSION",
      "table": "tbl1",
      "column": "OS_VERSION",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "UU",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "tbl1.USER_ID"
        },
        "returntype": "hllc(12)"
      }
    },
    {
      "name": "CONT_SIZE",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "tbl1.SIZE"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "CONT_COUNT",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "tbl1.COUNT"
        },
        "returntype": "bigint"
      }
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "tbl1.OS_VERSION",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "tbl1.PLATFORM",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "tbl1.DEVICE_FAMILY",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "CONT_SIZE",
              "CONT_COUNT"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "UU"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "tbl1.PLATFORM",
        "tbl1.OS_VERSION"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [
          "tbl1.DATE_HR"
        ],
        "joint_dims": []
      }
    }
  ],
  "signature": "ttrrs==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 0,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "volatile_range": 0,
  "retention_range": 0,
  "engine_type": 4,
  "storage_type": 2,
  "override_kylin_properties": {
    "job.queuename": "root.production.P0",
    "is-mandatory-only-valid": "true"
  },
  "cuboid_black_list": [],
  "parent_forward": 3,
  "mandatory_dimension_set_list": [],
  "snapshot_table_desc_list": []
}

This is a snippet of the code I tried, and it is giving a null list

import org.json4s._
import org.json4s.jackson.JsonMethods._

implicit val formats = org.json4s.DefaultFormats

case class Function (
                      expression: String,
                      parameter: Parameter,
                      returntype: String
                    )

case class Parameter (
                       `type`: String,
                       value: String
                     )


case class Measures (
                      name: String,
                      function: Function
                    )

case class AllMeasuresData(uuid: String, measure: List[Measures])

val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)

println(names)

case class AllMeasuresData(uuid: String, measure: List[Measures])

val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)

println(names)
CSUNNY
  • 414
  • 1
  • 7
  • 23

2 Answers2

0

Your Parameter class does not match the JSON because you have used type1 rather than type as the field name. Use backticks to use "type" as a field name even though it is a reserved word:

case class Parameter (
    `type`: String,
    value: String
  )

You also need to change the Function class as it has returntype1 rather than returntype:

case class Function (
    expression: String,
    parameter: Parameter,
    returntype: String
  )

The names of the fields in Scala must exactly match the names of the fields in the JSON. Extra fields in the JSON are ignored, but all the fields in the Scala must have matching fields in the JSON. If there are optional fields in the JSON then the Scala field type should be Option[...].

Tim
  • 26,753
  • 2
  • 16
  • 29
  • Updated the changes, still coming as null. – CSUNNY May 31 '19 at 19:55
  • You need to fix all the Scala field names so that they match the JSON field names. – Tim Jun 01 '19 at 06:35
  • I fixed the returntype as well. Still coming as null. The reason there were some typos earlier was because I was trying lot of stuff. Sorry for some of the confusion – CSUNNY Jun 01 '19 at 15:58
  • Start with a minimal version and add fields until it breaks to work out which is wrong. Or make them all `Option` and any that result in `None` are not specified correctly. – Tim Jun 01 '19 at 17:17
0

There are couple typos in your ADT:

Here is what you need:

  case class Function (
                        expression: String,
                        parameter: Parameter,
                        returntype: String
                      )

  case class Parameter (
                         `type`: String,
                         value: String
                       )


  case class Measures (
                        name: String,
                        function: Function
                      )

  case class AllMeasuresData(uuid: String, measures: List[Measures])

There is also an extra comma int the json, here is the correct:

  {
   "uuid":"uuidddd",
   "last_modified":1559080222953,
   "version":"2.6.1.0",
   "name":"FULL_DAY_2_mand_date",
   "is_draft":false,
   "model_name":"FULL_DAY_1_may05",
   "description":"",
   "null_string":null,
   "dimensions":[
      {
         "name":"PLATFORM",
         "table":"tbl1",
         "column":"PLATFORM",
         "derived":null
      },
      {
         "name":"OS_VERSION",
         "table":"tbl1",
         "column":"OS_VERSION",
         "derived":null
      } // There was an extra trailing comma here
   ],
   "measures":[
      {
         "name":"_COUNT_",
         "function":{
            "expression":"COUNT",
            "parameter":{
               "type":"constant",
               "value":"1"
            },
            "returntype":"bigint"
         }
      },
      {
         "name":"UU",
         "function":{
            "expression":"COUNT_DISTINCT",
            "parameter":{
               "type":"column",
               "value":"tbl1.USER_ID"
            },
            "returntype":"hllc(12)"
         }
      },
      {
         "name":"CONT_SIZE",
         "function":{
            "expression":"SUM",
            "parameter":{
               "type":"column",
               "value":"tbl1.SIZE"
            },
            "returntype":"bigint"
         }
      },
      {
         "name":"CONT_COUNT",
         "function":{
            "expression":"SUM",
            "parameter":{
               "type":"column",
               "value":"tbl1.COUNT"
            },
            "returntype":"bigint"
         }
      }
   ],
   "dictionaries":[

   ],
   "rowkey":{
      "rowkey_columns":[
         {
            "column":"tbl1.OS_VERSION",
            "encoding":"dict",
            "encoding_version":1,
            "isShardBy":false
         },
         {
            "column":"tbl1.PLATFORM",
            "encoding":"dict",
            "encoding_version":1,
            "isShardBy":false
         },
         {
            "column":"tbl1.DEVICE_FAMILY",
            "encoding":"dict",
            "encoding_version":1,
            "isShardBy":false
         }
      ]
   },
   "hbase_mapping":{
      "column_family":[
         {
            "name":"F1",
            "columns":[
               {
                  "qualifier":"M",
                  "measure_refs":[
                     "_COUNT_",
                     "CONT_SIZE",
                     "CONT_COUNT"
                  ]
               }
            ]
         },
         {
            "name":"F2",
            "columns":[
               {
                  "qualifier":"M",
                  "measure_refs":[
                     "UU"
                  ]
               }
            ]
         }
      ]
   },
   "aggregation_groups":[
      {
         "includes":[
            "tbl1.PLATFORM",
            "tbl1.OS_VERSION"
         ],
         "select_rule":{
            "hierarchy_dims":[

            ],
            "mandatory_dims":[
               "tbl1.DATE_HR"
            ],
            "joint_dims":[

            ]
         }
      }
   ],
   "signature":"ttrrs==",
   "notify_list":[

   ],
   "status_need_notify":[
      "ERROR",
      "DISCARDED",
      "SUCCEED"
   ],
   "partition_date_start":0,
   "partition_date_end":3153600000000,
   "auto_merge_time_ranges":[
      604800000,
      2419200000
   ],
   "volatile_range":0,
   "retention_range":0,
   "engine_type":4,
   "storage_type":2,
   "override_kylin_properties":{
      "job.queuename":"root.production.P0",
      "is-mandatory-only-valid":"true"
   },
   "cuboid_black_list":[

   ],
   "parent_forward":3,
   "mandatory_dimension_set_list":[

   ],
   "snapshot_table_desc_list":[

   ]
}

Now you can run:

  val data = parse(tmp).extract[AllMeasuresData]
  val names = data.measures.map(_.name)

  println(names)
// Displays
// List(_COUNT_, UU, CONT_SIZE, CONT_COUNT)
Valy Dia
  • 2,781
  • 2
  • 12
  • 32
  • Updated the changes. Still coming as null. Updated the changes to the main problem statement as well – CSUNNY May 31 '19 at 19:55
  • I can still see *returntype1* (instead of **returntype**), *measure* (instead of **measures**) in your update... Has these change has been applied as well? – Valy Dia May 31 '19 at 20:26
  • I did that also. Still coming as null. The reason there are some typos earlier was because I was trying lot of stuff. Sorry for some of the confusion – CSUNNY Jun 01 '19 at 15:57
  • Actually this worked. I did not see the change in 'measures' – CSUNNY Jun 01 '19 at 16:19