2

I am trying to make a POSTagger(Part of speech) in UIMA pipeline.I have download the stanford POSTagger jar and attached it to the project and copied the models for english but it throws some exception.

My Code:

package com.gauge.ie.uimaproject;

import java.io.IOException;

import org.apache.uima.UIMAException;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.jcas.JCas;

import edu.stanford.nlp.tagger.maxent.MaxentTagger;

public class POSTagger extends JCasAnnotator_ImplBase
{
    public static String SOFA_NAME="";
    MaxentTagger tagger = new MaxentTagger("tagger/bidirectional-distsim-wsj-0-18.tagger");

    @Override
    public void process(JCas jcas)throws AnalysisEngineProcessException
    {

    try
    {
            String text="";
            JCas newJCas=jcas.createView(SOFA_NAME);

            System.out.println("getting doc text.......");

            String docText = jcas.getDocumentText();
            String tagged=tagger.tagString(docText);
            System.out.println(tagged);
            newJCas.setDocumentText(tagged);
    }
        catch(CASException cae)
        {
            System.out.println(cae);
        }
    }
}

exception:

Reading POS tagger model from tagger/bidirectional-distsim-wsj-0-18.tagger ... org.apache.uima.resource.ResourceInitializationException: Could not instantiate Annotator class "com.gauge.ie.uimaproject.POSTagger". Check that your annotator class is not abstract and has a zero-argument constructor.  (Descriptor: <unknown>)
    at org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.initializeAnalysisComponent(PrimitiveAnalysisEngine_impl.java:250)
    at org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.initialize(PrimitiveAnalysisEngine_impl.java:170)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
    at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:430)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:374)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:187)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:331)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:448)
    at org.apache.uima.fit.pipeline.SimplePipeline.runPipeline(SimplePipeline.java:140)
    at com.gauge.ie.uimaproject.pipeline.main(pipeline.java:27)
Caused by: edu.stanford.nlp.io.RuntimeIOException: Unrecoverable error while loading a tagger model
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit(MaxentTagger.java:869)
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit(MaxentTagger.java:767)
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:298)
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.<init>(MaxentTagger.java:263)
    at com.gauge.ie.uimaproject.POSTagger.<init>(POSTagger.java:20)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
    at java.lang.Class.newInstance(Class.java:442)
    at org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.initializeAnalysisComponent(PrimitiveAnalysisEngine_impl.java:240)
    ... 16 more
Caused by: java.io.InvalidClassException: edu.stanford.nlp.tagger.maxent.ExtractorDistsim; local class incompatible: stream classdesc serialVersionUID = 1, local class serialVersionUID = 2
    at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:621)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1623)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
    at java.util.HashMap.readObject(HashMap.java:1396)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:497)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1017)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1896)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.readExtractors(MaxentTagger.java:595)
    at edu.stanford.nlp.tagger.maxent.MaxentTagger.readModelAndInit(MaxentTagger.java:820)
    ... 26 more
org.apache.uima.resource.ResourceInitializationException: Could not instantiate Annotator class "com.gauge.ie.uimaproject.POSTagger". Check that your annotator class is not abstract and has a zero-argument constructor.  (Descriptor: <unknown>)
Narendra Rawat
  • 353
  • 2
  • 5
  • 17

2 Answers2

2

Before coding your own integration, I suggest you have a look at DKPro and their integration of the Stanford PoS tagger. This will probably save you some time:

https://code.google.com/p/dkpro-core-asl/wiki/ComponentList_1_6_2#POS_Tagging

http://dkpro-core-gpl.googlecode.com/svn/de.tudarmstadt.ukp.dkpro.core-gpl/tags/de.tudarmstadt.ukp.dkpro.core-gpl-1.6.2/apidocs/index.html?de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordPosTagger.html

If you really want to code it yourself, you can have a look at their source:

https://github.com/dkpro/dkpro-core/blob/master/de.tudarmstadt.ukp.dkpro.core.stanfordnlp-gpl/src/main/java/de/tudarmstadt/ukp/dkpro/core/stanfordnlp/StanfordPosTagger.java

As far as I can tell, they instantiate the tagger with a different constructor:

String modelFile = aUrl.toString();
            MaxentTagger tagger = new MaxentTagger(modelFile,
                    StringUtils.argsToProperties(new String[] { "-model", modelFile }), false);
jvdbogae
  • 1,241
  • 9
  • 15
1

You are trying to load a model POS-tagger model that is not compatible with the version of the POS tagger that you are using

Caused by: java.io.InvalidClassException: edu.stanford.nlp.tagger.maxent.ExtractorDistsim; 
  local class incompatible: 
    stream classdesc serialVersionUID = 1, 
    local class serialVersionUID = 2

For that reason, the POS tagger cannot deserialize the model. Make sure you are using a compatible model.

rec
  • 10,340
  • 3
  • 29
  • 43
  • But this very model works when i use it w/o UIMA pipeline. – Narendra Rawat Jul 28 '15 at 09:16
  • Double check your classpath. Likely you are having multiple JARs with the same classes inside on the classpath and by chance when running under UIMA the compatible once take precedence over the incompatible ones. In any case, I'd strongly recommend using CoreNLP instead of the standalone POSTagger. CoreNLP includes most of the Stanford tools including the POSTagger. – rec Jul 28 '15 at 09:20