Sha256: c599709209e490fe9d714b31cad740b3274000afc21b12ec803236158338c69d
Contents?: true
Size: 857 Bytes
Versions: 1
Compression:
Stored size: 857 Bytes
Contents
# encoding: UTF-8 class String attr_accessor :normalized class << self attr_accessor :normalizer attr_accessor :tokenizer end def normalize unless normalized() new_string = (String.normalizer || TextNlp::Normalizer.new).normalize(self) new_string.normalized = true return new_string end self end def tokenize (String.tokenizer || TextNlp::Tokenizer.new).tokenize(self) end def similarity(text) score = 0.0 tokens1 = self.normalize.tokenize tokens2 = text.normalize.tokenize if (tokens1.size > 0 && tokens2.size > 0) intersection = tokens1 & tokens2 score = (((intersection.size.to_f / tokens1.size.to_f) + (intersection.size.to_f / tokens2.size.to_f)) / 2) end score end def translate(translator) translator.translate(self) end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
text_nlp-0.0.2 | lib/text_nlp/string.rb |