Sha256: c599709209e490fe9d714b31cad740b3274000afc21b12ec803236158338c69d

Contents?: true

Size: 857 Bytes

Versions: 1

Compression:

Stored size: 857 Bytes

Contents

# encoding: UTF-8

class String
  
  attr_accessor :normalized
  
  class << self
    attr_accessor :normalizer
    attr_accessor :tokenizer
  end
  
  def normalize
    unless normalized()
      new_string = (String.normalizer || TextNlp::Normalizer.new).normalize(self)
      new_string.normalized = true
      return new_string
    end
    self
  end
  
  def tokenize
    (String.tokenizer || TextNlp::Tokenizer.new).tokenize(self)
  end
  
  def similarity(text)
    score = 0.0
    tokens1 = self.normalize.tokenize
    tokens2 = text.normalize.tokenize
    if (tokens1.size > 0 && tokens2.size > 0)
      intersection = tokens1 & tokens2
      score = (((intersection.size.to_f / tokens1.size.to_f) + (intersection.size.to_f / tokens2.size.to_f)) / 2)
    end
    score
  end
  
  def translate(translator)
    translator.translate(self)
  end
  
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
text_nlp-0.0.2 lib/text_nlp/string.rb