Sha256: c2a3accb0ecbf35c891e8acac5df6dd4f673843564bfc6dbbf859345fbb14370

Contents?: true

Size: 1.2 KB

Versions: 1

Compression:

Stored size: 1.2 KB

Contents

module FastText
  class Vectorizer < Model
    DEFAULT_OPTIONS = {
      lr: 0.5,
      lr_update_rate: 100,
      dim: 100,
      ws: 5,
      epoch: 5,
      min_count: 1,
      min_count_label: 0,
      neg: 5,
      word_ngrams: 1,
      loss: "ns",
      model: "skipgram",
      bucket: 2000000,
      minn: 3,
      maxn: 6,
      thread: 3,
      t: 0.0001,
      verbose: 2,
      pretrained_vectors: "",
      save_output: false,
      # seed: 0
    }

    def fit(x)
      input = input_path(x)
      @m ||= Ext::Model.new
      m.train(DEFAULT_OPTIONS.merge(@options).merge(input: input))
    end

    def nearest_neighbors(word, k: 10)
      m.nearest_neighbors(word, k).map(&:reverse).to_h
    end

    def analogies(word_a, word_b, word_c, k: 10)
      m.analogies(k, word_a, word_b, word_c).map(&:reverse).to_h
    end

    private

    # separate example by newlines
    # https://github.com/facebookresearch/fastText/issues/518
    def input_path(x)
      if x.is_a?(String)
        x
      else
        tempfile = Tempfile.new("fasttext")
        x.each do |xi|
          tempfile.write(xi)
          tempfile.write("\n")
        end
        tempfile.close
        tempfile.path
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
fasttext-0.1.0 lib/fasttext/vectorizer.rb