0

here is my hive table

create table if not exists dumdum (val map<string,map<string,struct<student_id:string,age:int>>>);
insert into dumdum select map('A',map('1',named_struct('student_id','123a', 'age',11)));
insert into dumdum select map('B',map('2',named_struct('student_id','987z', 'age',11)));
select * from dumdum;

and i see

{"A":{"1":{"student_id":"123a","age":11}}}
{"B":{"2":{"student_id":"987z","age":11}}}

I want to extract all the student_id from the inner map i.e. 123a and 987z. So here is what i want to do

select some_udf(val) from dumdum;

and the result should be

["123a","987z"]

Here is the Java UDF i wrote

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;

import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class CustomUDF extends GenericUDF {
    private MapObjectInspector  inputMapOI                        = null;

    private Converter           inputMapKeyConverter              = null;

    private MapObjectInspector inputMapValueMapOI               = null;
    private Converter inputMapValueConverter;

    @Override
    public String getDisplayString(String[] arguments) {
        return "my udf";
    }

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 arguments are expected.");
        }

        if (!(arguments[0] instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The first parameter should be a map object ");
        }

        inputMapOI = (MapObjectInspector) arguments[0];

        ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);

        if (!(inputMapOI.getMapValueObjectInspector() instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The map value type must be a map ");
        }
        inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        ObjectInspector inputMapElementOI = inputMapValueMapOI.getMapValueObjectInspector();
        ObjectInspector outputMapElementOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

        inputMapValueConverter = ObjectInspectorConverters.getConverter(inputMapElementOI, outputMapElementOI);

        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 argument is expected.");
        }

        Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
        List<String> dataList = new ArrayList<String>();
        for (Object key : map.keySet()) {
            Map<?, ?> valueMap = this.inputMapValueMapOI.getMap(map.get(key));
            if ((valueMap == null) || (valueMap.size() == 0)) {
                continue;
            }

            for (Object value : valueMap.keySet()) {
                try{
                    String innerkey = (String) this.inputMapKeyConverter.convert(value);
                    System.out.println("Got "+innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(key));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("Got null");
                        continue;
                    }
                    for (Object struct : innerMap.keySet()) {
                    String strValue = (String) this.inputMapValueConverter.convert(struct);
                    
                    StructField str = (StructField) inputMapValueConverter.convert(innerMap.get(strValue));
                    /*
                    Not sure what to do here. Maybe 
                    str.getFieldID();
                    dataList.add(str.toString()); 
                    */
                    
                }
                }
                catch (ClassCastException c){
                    System.out.println("Got ClassCastException");
                }
            }
        }

        return dataList;
    }
}

when i invoke it as

add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dumdum;

i never get past

Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(inner));
                        if ((innerMap == null) || (innerMap.size() == 0)) {
                            System.out.println("Got null");
                            continue;
                        }

I can see the output of

System.out.println("Got "+innerKey);

on the console.

Why cant my converter access the inner map ?

Also, how will i dereference the StructField once i am able to access the inner map ?

Update

Thanks serge_k for the suggestion. I'm afraid i still need one converter else i wont be able to get the key. here is what i tried

First i defined the second map inspector and the struct inspector as

inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        structOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

then here is what i tried

                    String innerkey = (String) inputMapKeyConverter.convert(value);
                    System.out.println(innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(innerkey));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("null inner map");
                        continue;
                    }
                    
                    for (Object struct : innerMap.keySet()) {
                        String ikey = (String) inputMapKeyConverter.convert(struct);
                        Object obj = structOI.getStructFieldData(innerMap.get(ikey), structOI.getStructFieldRef("student_id"));
                        dataList.add(obj.toString());
                    }

but i still see

null inner map

have i not defined the inner map inspector properly ?

AbtPst
  • 7,778
  • 17
  • 91
  • 172
  • Try to convert map keys to standard keys (in the update) – serge_k Jul 01 '20 at 20:23
  • Second thought, try to iterate through `entrySet` instead of `keySet`, looks like you doesn't need keys. Also check out the map size with `inputMapOI.getMapSize(arguments[0].get());` – serge_k Jul 01 '20 at 21:06

1 Answers1

1

I would recommend you not to use converters, just define second MapObjectInspector for the inner map, get outer map value and call getMap like for the first map. To get the struct values you need to define a variable of StructObjectInspector type in initialize, e.g.

StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors)

then

Object obj = soi.getStructFieldData(innerMapValue, soi.getStructFieldRef("student_id"))

Update: Try to convert map keys to standard keys as follows

private Map stdKeys(Map inspectMap) {
    Map objMap = new HashMap();
    for (Object inspKey : inspectMap.keySet()) {

        Object objKey = ((PrimitiveObjectInspector) mapInspector.getMapKeyObjectInspector()).getPrimitiveJavaObject(inspKey);
        objMap.put(objKey, inspKey);

    }
    return objMap;
}

See https://github.com/klout/brickhouse/blob/master/src/main/java/brickhouse/udf/collect/MapRemoveKeysUDF.java for more details

serge_k
  • 1,772
  • 2
  • 15
  • 21
  • thanks serge_k, please see my update. i still cant access the inner map. i need at least 1 converter to be able to get the keys as strings. however, i also created a second inspector for the inner map but still i am unable to extract the inner map – AbtPst Jul 01 '20 at 17:54