1

Can anyone help me with the correct use of GATE NLP Python's RemoveAnn' action (gatenlp.pam.pampac.actions RemoveAnn - docs)

Whilst adding annotations with PAMPAC is ok, I can't seem to then remove the annotation.

For example, here I add an annotation to the word 'cat', then try to remove the annotation:

from gatenlp import Document
from gatenlp.processing.tokenizer import NLTKTokenizer
from gatenlp.pam.pampac import *
from nltk.tokenize.regexp import WhitespaceTokenizer


def pprint(doc):
    print("------")
    for a in doc.annset():
        print(doc[a].ljust(6, " ") + "- " + str(a))
    print("------")

# Generate document and tokenize
text = """dog cat fish"""
tok1 = NLTKTokenizer(nltk_tokenizer=WhitespaceTokenizer())
doc1 = Document(text)
doc1 = tok1(doc1)
print("Simple document:")
pprint(doc1)

# Find cat text and annotate
pat1 = Text(text="cat")
action1 = AddAnn(type="CAT_TAG")
rule1 = Rule(pat1, action1)
pampac1 = Pampac(rule1, skip="longest", select="first")
annt1 = PampacAnnotator(pampac1, annspec=[("", "Token")], outset_name="")
annt1(doc1)
print("Annotate cat text with Annotation of type 'CAT_TAG'")
pprint(doc1)

# Find annotation and try to remove
pat3 = AnnAt(type="CAT_TAG", name="remove")
# action3 = AddAnn(type="CAT_TAGGED_AGAIN", name="remove")  # works
action3 = RemoveAnn("remove", annset=doc1.annset())  # <-- is this the right way to specify the annset???
rule3 = Rule(pat3, action3)
pampac2 = Pampac(rule3, skip="longest", select="first")
annt2 = PampacAnnotator(pampac2, annspec=[""], outset_name="")
annt2(doc1)
print("Try to remove the 'CAT_TAG' annotation from the default set")
pprint(doc1)

Output:

Simple document:
------
dog   - Annotation(0,3,Token,features=Features({}),id=0)
cat   - Annotation(4,7,Token,features=Features({}),id=1)
fish  - Annotation(8,12,Token,features=Features({}),id=2)
------
Annotate cat text with Annotation of type 'CAT_TAG'
------
dog   - Annotation(0,3,Token,features=Features({}),id=0)
cat   - Annotation(4,7,Token,features=Features({}),id=1)
cat   - Annotation(4,7,CAT_TAG,features=Features({}),id=3)
fish  - Annotation(8,12,Token,features=Features({}),id=2)
------

# Error:
#
# Traceback (most recent call last):
#   File ".../scratch.py", line 37, in <module>
#     annt2(doc1)
#   File ".../gatenlp/pam/pampac/pampac.py", line 234, in __call__
#     self.pampac.run(doc, anns, outset=outset, containing_anns=cont)
#   File ".../gatenlp/pam/pampac/pampac.py", line 106, in run
#     return self._run4span(logger, ctx, location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/pampac.py", line 133, in _run4span
#     fret = self.rules[idx].action(ret, context=ctx, location=location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/actions.py", line 123, in __call__
#     return self.actions[0](succ, context=context, location=location)
#            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#   File ".../gatenlp/pam/pampac/actions.py", line 421, in __call__
#     self.annset.remove(theann)
#   File ".../gatenlp/annotation_set.py", line 601, in remove
#     self._annset.remove(ann)
# KeyError: Annotation(4,7,CAT_TAG,features=Features({}),id=3)

0 Answers0