Sha256: 7942fe4341829d2b8387a239ee736e7cc04427bcad7c40fd745111547f272c5d
Contents?: true
Size: 1.54 KB
Versions: 1
Compression:
Stored size: 1.54 KB
Contents
require 'rbbt-util' require 'rbbt/util/misc' require 'rbbt/util/simpleDSL' class CueIndex include SimpleDSL class LexiconMissingError < StandardError; end def define(name, *args, &block) @rules << [name,block] nil end def initialize(file = nil, &block) @rules = [] file ||= Rbbt.share.rnorm.cue_default.produce if !file && !block load_config(:define, file, &block) end def config @config[:define] end def cues(word) @rules.collect{|rule| c = rule[1].call(word) c = [c] unless c.is_a? Array c } end def clean(max) @indexes.each{|index| remove = [] index.each{|key,values| remove << key if values.length > max } remove.each{|key| index.delete(key) } } end def load(file, max_candidates = 50) @indexes = Array.new(@rules.size){Hash.new} data = TSV.new(file, :flat) data.each{|code, values| values.each{|value| cues(value).each_with_index{|cue_list,i| cue_list.each{|cue| @indexes[i][cue] ||= [] @indexes[i][cue] << code unless @indexes[i][cue].include? code } } } } clean(max_candidates) if max_candidates nil end def match(name) raise LexiconMissingError, "Load Lexicon before matching" unless @indexes cues = cues(name) @indexes.each_with_index{|index,i| best = [] cues[i].each{|cue| best << index[cue] if index[cue] } return best.flatten if best.any? } return [] end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rbbt-text-0.5.0 | lib/rbbt/ner/rnorm/cue_index.rb |