I use below code. However, the outcome is not what I expected. The outcome is [machine, Learning]
But I want to get [machine, learn]
. How can I do this? Also, when my input is "biggest bigger"
, I wanna get the result like [big, big]
, but the outcome is just [biggest bigger]
(PS: I just add these four jars in my eclipse:joda-time.jar, stanford-corenlp-3.3.1-models.jar, stanford-corenlp-3.3.1.jar, xom.jar
Do I need add some more?)
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
public class StanfordLemmatizer {
protected StanfordCoreNLP pipeline;
public StanfordLemmatizer() {
// Create StanfordCoreNLP object properties, with POS tagging
// (required for lemmatization), and lemmatization
Properties props;
props = new Properties();
props.put("annotators", "tokenize, ssplit, pos, lemma");
this.pipeline = new StanfordCoreNLP(props);
}
public List<String> lemmatize(String documentText)
{
List<String> lemmas = new LinkedList<String>();
// Create an empty Annotation just with the given text
Annotation document = new Annotation(documentText);
// run all Annotators on this text
this.pipeline.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(SentencesAnnotation.class);
for(CoreMap sentence: sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
lemmas.add(token.get(LemmaAnnotation.class));
}
}
return lemmas;
}
// Test
public static void main(String[] args) {
System.out.println("Starting Stanford Lemmatizer");
String text = "Machine Learning\n";
StanfordLemmatizer slem = new StanfordLemmatizer();
System.out.println(slem.lemmatize(text));
}
}