Sha256: e0e056c525a45fee6db355d234d71974982af0a521f7ec7ece426f3403b67572
Contents?: true
Size: 1.52 KB
Versions: 45
Compression:
Stored size: 1.52 KB
Contents
package lingscope.algorithms; import edu.stanford.nlp.tagger.maxent.MaxentTagger; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * Part of speech tagger * @author shashank */ public class PosTagger { private MaxentTagger posTagger; /** * Creates an instance of POS tagger by loading the given grammar file * @param grammarFile */ public PosTagger(String grammarFile) { try { posTagger = new MaxentTagger(grammarFile); } catch (Exception ex) { Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex); } } /** * Takes a sentence as input and returns list of POS tags associated with * each word in the sentence * @param sentence * @param isTokenized * @return */ public List<String> replaceWordsWithPos(String sentence, boolean isTokenized) { if (!isTokenized) { sentence = AbnerTokenizer.splitTermsByPunctuation(sentence); } List<String> ret = new ArrayList<String>(); String tagged = ""; try { tagged = posTagger.tagString(sentence); } catch (Exception ex) { Logger.getLogger(PosTagger.class.getName()).log(Level.SEVERE, null, ex); } for (String wordTag : tagged.split(" +")) { String[] tags = wordTag.split("/"); String tag = tags[tags.length - 1]; ret.add(tag); } return ret; } }
Version data entries
45 entries across 45 versions & 1 rubygems