0

I've asked this question in different ways a couple of times already. Each time I get a breakthrough I encounter another issue. This is also due to the fact that I am not proficient in Java yet and have difficulty with collections like Maps. So please bear with me.

I have two maps like this:

Map1 -{ORGANIZATION=[Fulton Tax Commissioner 's Office, Grady Hospital, Fulton Health Department], LOCATION=[Bellwood, Alpharetta]}

Map2 - {ORGANIZATION=[Atlanta Police Department, Fulton Tax Commissioner, Fulton Health Department], LOCATION=[Alpharetta], PERSON=[Bellwood, Grady Hospital]}

The maps are defined as : LinkedHashMap<String, List<String>> sampleMap = new LinkedHashMap<String, List<String>>();

I am comparing these two maps based on the values and there are only 3 keys i.e ORGANIZATION, PERSON and LOCATION. Map1 is my goldset that I am comparing Map2 against. Now the problem that I am facing is when I iterate over the values of ORGANIZATION key in Map1 and check for matches in Map2, even though my first entry does have a partial match in Map2 (Fulton Tax Commissioner) but because the first entry of Map2 (Atlanta Police Department) is not a match I get an incorrect result(I am looking for exact and partial matches both). The result here being increment the true positive, false positive and false negative counters which enable me to ultimately calculate precision and recall for this i.e. Named Entity Recognition.

EDIT

The result I am expecting for this is

Organization: 
True Positive Count = 2
False Negative Count = 1
False Positive Count = 1

Person:
False Positive Count = 2

Location:
True Positive Count = 1
False Negative Count = 1

The output I am currently getting is :

Organization: 
    True Positive Count = 1
    False Negative Count = 2
    False Positive Count = 0

    Person:
    True Positive Count = 0
    False Negative Count = 0
    False Positive Count = 2

    Location:
    True Positive Count = 0
    False Negative Count = 1
    False Positive Count = 0

CODE

private static List<Integer> compareMaps(LinkedHashMap<String, List<String>> annotationMap, LinkedHashMap<String, List<String>> rageMap) 
    {
        List<Integer> compareResults = new ArrayList<Integer>();  

         if (!annotationMap.entrySet().containsAll(rageMap.entrySet())){
               for (Entry<String, List<String>> rageEntry : rageMap.entrySet()){
                   if (rageEntry.getKey().equals("ORGANIZATION") && !(annotationMap.containsKey(rageEntry.getKey()))){
                       for (int j = 0; j< rageEntry.getValue().size(); j++) {
                           orgFalsePositiveCount++;
                       }
               }
                   if (rageEntry.getKey().equals("PERSON") && !(annotationMap.containsKey(rageEntry.getKey()))){
                      // System.out.println(rageEntry.getKey());
                      // System.out.println(annotationMap.entrySet());
                       for (int j = 0; j< rageEntry.getValue().size(); j++) {
                           perFalsePositiveCount++;
                       }
               }
                   if (rageEntry.getKey().equals("LOCATION") && !(annotationMap.containsKey(rageEntry.getKey()))){
                       for (int j = 0; j< rageEntry.getValue().size(); j++) {
                           locFalsePositiveCount++;
                     }
                 }
              }
           }



               for (Entry<String, List<String>> entry : annotationMap.entrySet()){

                   int i_index = 0;
                   if (rageMap.entrySet().isEmpty()){
                       orgFalseNegativeCount++;
                       continue;
                   }

                  // for (Entry<String, List<String>> rageEntry : rageMap.entrySet()){

                   if (entry.getKey().equals("ORGANIZATION")){
                       for(String val : entry.getValue()) {
                           if (rageMap.get(entry.getKey()) == null){
                               orgFalseNegativeCount++;
                               continue;
                       }
            recusion:      for (int i = i_index; i< rageMap.get(entry.getKey()).size();){
                                String rageVal = rageMap.get(entry.getKey()).get(i);
                               if(val.equals(rageVal)){
                                   orgTruePositiveCount++;
                                   i_index++;
                                   break recusion;
                       }

                           else if((val.length() > rageVal.length()) && val.contains(rageVal)){  //|| dataB.get(entryA.getKey()).contains(entryA.getValue())){
                               orgTruePositiveCount++;
                               i_index++;
                               break recusion;
                       }
                           else if((val.length() < rageVal.length()) && rageVal.contains(val)){
                               orgTruePositiveCount++;
                                i_index++;
                                break recusion;
                           }

                           else if(!val.contains(rageVal)){
                               orgFalseNegativeCount++;
                               i_index++;
                               break recusion;
                           }
                           else if(!rageVal.contains(val)){
                                 orgFalsePositiveCount++;
                                 i_index++;
                                 break recusion;
                             }


                      }
                    }
                   }

                  ......................... //(Same for person and location)


                    compareResults.add(orgTruePositiveCount); 
                    compareResults.add(orgFalseNegativeCount); 
                    compareResults.add(orgFalsePositiveCount);  
                    compareResults.add(perTruePositiveCount); 
                    compareResults.add(perFalseNegativeCount);  
                    compareResults.add(perFalsePositiveCount); 
                    compareResults.add(locTruePositiveCount); 
                    compareResults.add(locFalseNegativeCount);  
                    compareResults.add(locFalsePositiveCount); 

                    System.out.println(compareResults);
                    return compareResults;

            }  
serendipity
  • 852
  • 13
  • 32
  • 1
    You should more formally describe what result you want to receive – Andremoniy Feb 13 '17 at 10:57
  • @Andremoniy Done! I have code that accomplishes most of this but I guess what I am looking for is an answer to questions like 'Do I need to sort the maps before comparing?' or is there something else I should do to prevent this issue? – serendipity Feb 13 '17 at 11:03
  • What is the value type? Is it a set or a list? – vanje Feb 13 '17 at 11:03
  • How are you doing the comparison? Can you post the code? – anacron Feb 13 '17 at 11:05
  • @vanje Edited the question to answer your question. – serendipity Feb 13 '17 at 11:09
  • @anacron Added code. It's really long. – serendipity Feb 13 '17 at 11:09
  • Thanks for posting the code. Map1 has `Fulton Tax Commissioner 's Office` and Map2 has `Fulton Tax Commissioner`. Is this what you're calling as Partial Match? – anacron Feb 13 '17 at 11:15
  • @anacron Yes. I am counting partial matches as True Positive matches – serendipity Feb 13 '17 at 11:16
  • so anything in the first map with a string like 'abcd something' is counted as true positive if in the other map is 'something'? – Zeromus Feb 13 '17 at 11:21
  • @Zeromus Yes. Unfortunately I have to do that because the dataset I am using as my goldset is not perfect. So a partial match is also good enough to be called a true positive match at this point. – serendipity Feb 13 '17 at 11:23
  • @serendipity So in case of partial comparison, how big part of the string should be consistent? Full words, part of the word or at least few words? – Artur Nowicki Feb 13 '17 at 13:32

3 Answers3

1

here if i got this right it may help.

i created a custom String to override the equals for partial match

public class MyCustomString {

    private String myString;

    public MyCustomString(String myString) {
        this.myString = myString;
    }

    public String getMyString() {
        return myString;
    }

    public void setMyString(String myString) {
        this.myString = myString;
    }

    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final MyCustomString other = (MyCustomString) obj;
        if (!Objects.equals(this.myString, other.myString) && !other.myString.contains(this.myString)) {
            return false;
        }
        return true;
    }

    // add getter and setter for myString 
    // or delegate needed methods to myString object.
    @Override
    public int hashCode() {
        int hash = 3;
        hash = 47 * hash + Objects.hashCode(this.myString);
        return hash;
    }
}

and here the code i tried with the first part of your map

LinkedHashMap<String, List<MyCustomString>> sampleMap1 = new LinkedHashMap<String, List<MyCustomString>>();
        sampleMap1.put("a", new ArrayList<>());
        sampleMap1.get("a").add(new MyCustomString("Fulton Tax Commissioner 's Office"));
        sampleMap1.get("a").add(new MyCustomString("Grady Hospital"));
        sampleMap1.get("a").add(new MyCustomString("Fulton Health Department"));

        LinkedHashMap<String, List<MyCustomString>> sampleMap2 = new LinkedHashMap<String, List<MyCustomString>>();
        sampleMap2.put("a", new ArrayList<>());
        sampleMap2.get("a").add(new MyCustomString("Atlanta Police Department"));
        sampleMap2.get("a").add(new MyCustomString("Fulton Tax Commissioner"));
        sampleMap2.get("a").add(new MyCustomString("Fulton Health Department"));

        HashMap<String, Integer> resultMap = new HashMap<String, Integer>();

        for (Map.Entry<String, List<MyCustomString>> entry : sampleMap1.entrySet()) {
            String key1 = entry.getKey();
            List<MyCustomString> value1 = entry.getValue();
            List<MyCustomString> singleListOfMap2 = sampleMap2.get(key1);
            if (singleListOfMap2 == null) {
                // all entry are false negative
                System.out.println("Number of false N" + value1.size());
            }
            for (MyCustomString singleStringOfMap2 : singleListOfMap2) {
                if (value1.contains(singleStringOfMap2)) {
                    //True positive
                    System.out.println("true");
                } else {
                    //false negative
                    System.out.println("false N");
                }
            }
            int size = singleListOfMap2.size();
            System.out.println(size + " - numero di true");
            //false positive = size - true
        }
        for (String string : sampleMap2.keySet()) {
            if (sampleMap1.get(string) == null) {
                //all these are false positive
                System.out.println("numero di false P: " + sampleMap2.get(string).size());
            }
        }
Zeromus
  • 4,472
  • 8
  • 32
  • 40
  • ah noticed later the map could miss an entire key like PERSON in your first case – Zeromus Feb 13 '17 at 12:07
  • Thanks so much for the code! I am trying to see how I can incorporate it in my code and test it. TO answer you question about missing the PERSON key..the first part of my code takes care of that. Any extra stuff in map2 that is not in map1 is a false positive. – serendipity Feb 13 '17 at 12:10
  • well i fixed it too ;) – Zeromus Feb 13 '17 at 12:13
  • So with this code my maps don't necessarily have to be in order for comparison? – serendipity Feb 13 '17 at 12:17
  • no they dont have to, but i didn't test it throughtfully though, so you might want to do some tries – Zeromus Feb 13 '17 at 12:20
1

I wrote this class to compare the maps:

public class MapComparison<K, V> {
    private final Map<K, Collection<ValueCounter>> temp;
    private final Map<K, Collection<V>> goldMap;
    private final Map<K, Collection<V>> comparedMap;
    private final BiPredicate<V, V> valueMatcher;

    public MapComparison(Map<K, Collection<V>> mapA, Map<K, Collection<V>> mapB, BiPredicate<V, V> valueMatcher) {
        this.goldMap = mapA;
        this.comparedMap = mapB;
        this.valueMatcher = valueMatcher;

        this.temp = new HashMap<>();

        goldMap.forEach((key, valueList) -> {
            temp.put(key, valueList.stream().map(value -> new ValueCounter(value, true)).collect(Collectors.toList()));
        });

        comparedMap.entrySet().stream().forEach(entry -> {

            K key = entry.getKey();
            Collection<V> valueList = entry.getValue();

            if(temp.containsKey(key)) {
                Collection<ValueCounter> existingMatches = temp.get(key);

                Stream<V> falsePositives = valueList.stream().filter(v -> existingMatches.stream().noneMatch(mv -> mv.match(v)));

                falsePositives.forEach(fp -> existingMatches.add(new ValueCounter(fp, false)));
            } else {
                temp.putIfAbsent(key, valueList.stream().map(value -> new ValueCounter(value, false)).collect(Collectors.toList()));
            }
        });
    }

    public String formatMatchedCounters() {
        StringBuilder sb = new StringBuilder();

        for(Entry<K, Collection<ValueCounter>> e : temp.entrySet()) {
            sb.append(e.getKey()).append(":");

            int[] counters = e.getValue().stream().collect(() -> new int[3], (a, b) -> {
                a[0] += b.truePositiveCount;
                a[1] += b.falsePositiveCount;
                a[2] += b.falseNegativeCount;
            }, (c, d) -> {
                c[0] += d[0];
                c[1] += d[1];
                c[2] += d[2];
            });
            sb.append(String.format("\ntruePositiveCount=%s\nfalsePositiveCount=%s\nfalseNegativeCount=%s\n\n", counters[0], counters[1], counters[2]));
        }
        return sb.toString();
    }


    private class ValueCounter {
        private final V goldValue;

        private int truePositiveCount = 0;
        private int falsePositiveCount = 0;
        private int falseNegativeCount = 0;

        ValueCounter(V value, boolean isInGoldMap) {
            this.goldValue = value;

            if(isInGoldMap) {
                falseNegativeCount = 1;
            } else {
                falsePositiveCount = 1;
            }
        }

        boolean match(V otherValue) {
            boolean result = valueMatcher.test(goldValue, otherValue);

            if(result) {
                truePositiveCount++;

                falseNegativeCount = 0;
            }
            return result;
        }
    }
}

What is does is basically creating a union of map items, and each item has it's own mutable counter to calculate matching values. The method formatMatchedCounters() just iterates and sums these counters for each of the keys.

The following test:

public class MapComparisonTest {

    private Map<String, Collection<String>> goldMap;
    private Map<String, Collection<String>> comparedMap;
    private BiPredicate<String, String> valueMatcher;

    @Before
    public void initMaps() {
        goldMap = new HashMap<>();
        goldMap.put("ORGANIZATION", Arrays.asList("Fulton Tax Commissioner", "Grady Hospital", "Fulton Health Department"));
        goldMap.put("LOCATION", Arrays.asList("Bellwood", "Alpharetta"));

        comparedMap = new HashMap<>();
        comparedMap.put("ORGANIZATION", Arrays.asList("Atlanta Police Department", "Fulton Tax Commissioner", "Fulton Health Department"));
        comparedMap.put("LOCATION", Arrays.asList("Alpharetta"));
        comparedMap.put("PERSON", Arrays.asList("Bellwood", "Grady Hospital"));

        valueMatcher = String::equalsIgnoreCase;
    }

    @Test
    public void test() {
        MapComparison<String, String> comparison = new MapComparison<>(goldMap, comparedMap, valueMatcher);

        System.out.println(comparison.formatMatchedCounters());
    }
}

has the result of:

ORGANIZATION:
truePositiveCount=2
falsePositiveCount=1
falseNegativeCount=1

LOCATION:
truePositiveCount=1
falsePositiveCount=0
falseNegativeCount=1

PERSON:
truePositiveCount=0
falsePositiveCount=2
falseNegativeCount=0

Note, that I don't know how you want to compare similar values (e.g. "Fulton Tax Commissioner" vs "Fulton Tax Commissioner s"), so I decided to put that decision in the signature (in this case a BiPredicate as parameter).

For example, the String comparison could be implemented using the Levenshtein distance:

valueMatcher = (s1, s2) -> StringUtils.getLevenshteinDistance(s1, s2) < 5;
SME_Dev
  • 1,880
  • 13
  • 23
1

I came up with a simplified version. This is the output that I get:

Organization:
    False Positive: Atlanta Police Department
    True Positive: Fulton Tax Commissioner
    True Positive: Fulton Health Department
    False Negative: Grady Hospital

Person:
    False Positive: Bellwood
    False Positive: Grady Hospital

Location:
    True Positive: Alpharetta
    False Negative: Bellwood

[2, 1, 1, 0, 0, 2, 1, 1, 0]

Here is the code that I created:


public class MapCompare {

    public static boolean listContains(List<String> annotationList, String value) {
        if(annotationList.contains(value)) {
            // 100% Match
            return true;
        }
        for(String s: annotationList) {
            if (s.contains(value) || value.contains(s)) {
                // Partial Match
                return true;
            }
        }
        return false;
    }

    public static List<Integer> compareLists(List<String> annotationList, List<String> rageList){
        List<Integer> compareResults = new ArrayList<Integer>();
        if(annotationList == null || rageList == null) return Arrays.asList(0, 0, 0);
        Integer truePositiveCount = 0;
        Integer falseNegativeCount = 0;
        Integer falsePositiveCount = 0;

        for(String r: rageList) {
            if(listContains(annotationList, r)) {
                System.out.println("\tTrue Positive: " + r);
                truePositiveCount ++;
            } else {
                System.out.println("\tFalse Positive: " + r);
                falsePositiveCount ++;
            }
        }

        for(String s: annotationList) {
            if(listContains(rageList, s) == false){
                System.out.println("\tFalse Negative: " + s);
                falseNegativeCount ++;
            }
        }

        compareResults.add(truePositiveCount);
        compareResults.add(falseNegativeCount);
        compareResults.add(falsePositiveCount);

        System.out.println();

        return compareResults;
    }

    private static List<Integer> compareMaps(LinkedHashMap<String, List<String>> annotationMap, LinkedHashMap<String, List<String>> rageMap) {
        List<Integer> compareResults = new ArrayList<Integer>();
        System.out.println("Organization:");
        compareResults.addAll(compareLists(annotationMap.get("ORGANIZATION"), rageMap.get("ORGANIZATION")));
        System.out.println("Person:");
        compareResults.addAll(compareLists(annotationMap.get("PERSON"), rageMap.get("PERSON")));
        System.out.println("Location:");
        compareResults.addAll(compareLists(annotationMap.get("LOCATION"), rageMap.get("LOCATION")));
        System.out.println(compareResults);
        return compareResults;
    }

    public static void main(String[] args) {
        LinkedHashMap<String, List<String>> Map1 = new LinkedHashMap<>();
        List<String> m1l1 = Arrays.asList("Fulton Tax Commissioner's Office", "Grady Hospital", "Fulton Health Department");
        List<String> m1l2 = Arrays.asList("Bellwood", "Alpharetta");
        List<String> m1l3 = Arrays.asList();
        Map1.put("ORGANIZATION", m1l1);
        Map1.put("LOCATION", m1l2);
        Map1.put("PERSON", m1l3);

        LinkedHashMap<String, List<String>> Map2 = new LinkedHashMap<>();
        List<String> m2l1 = Arrays.asList("Atlanta Police Department", "Fulton Tax Commissioner", "Fulton Health Department");
        List<String> m2l2 = Arrays.asList("Alpharetta");
        List<String> m2l3 = Arrays.asList("Bellwood", "Grady Hospital");

        Map2.put("ORGANIZATION", m2l1);
        Map2.put("LOCATION", m2l2);
        Map2.put("PERSON", m2l3);

        compareMaps(Map1, Map2);

    }

}

Hope this helps!

anacron
  • 6,443
  • 2
  • 26
  • 31