Sha256: 04867c48b2e697f13872f87fd4c328364481193ee18929cd3878de2e49fb7836

Contents?: true

Size: 1.41 KB

Versions: 5

Compression:

Stored size: 1.41 KB

Contents

module FastText
  class Vectorizer < Model
    DEFAULT_OPTIONS = {
      lr: 0.5,
      lr_update_rate: 100,
      dim: 100,
      ws: 5,
      epoch: 5,
      min_count: 1,
      min_count_label: 0,
      neg: 5,
      word_ngrams: 1,
      loss: "ns",
      model: "skipgram",
      bucket: 2000000,
      minn: 3,
      maxn: 6,
      thread: 3,
      t: 0.0001,
      verbose: 2,
      pretrained_vectors: "",
      save_output: false,
      seed: 0,
      autotune_validation_file: "",
      autotune_metric: "f1",
      autotune_predictions: 1,
      autotune_duration: 60 * 5,
      autotune_model_size: ""
    }

    def fit(x)
      @m ||= Ext::Model.new
      a = build_args(DEFAULT_OPTIONS)
      a.input, _ref = input_path(x)
      m.train(a)
    end

    def nearest_neighbors(word, k: 10)
      m.nearest_neighbors(word, k).map(&:reverse).to_h
    end

    def analogies(word_a, word_b, word_c, k: 10)
      m.analogies(k, word_a, word_b, word_c).map(&:reverse).to_h
    end

    private

    # separate example by newlines
    # https://github.com/facebookresearch/fastText/issues/518
    def input_path(x)
      if x.is_a?(String)
        [x, nil]
      else
        tempfile = Tempfile.new("fasttext")
        x.each do |xi|
          tempfile.write(xi.gsub("\n", " ")) # replace newlines in document
          tempfile.write("\n")
        end
        tempfile.close
        [tempfile.path, tempfile]
      end
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
fasttext-0.4.0 lib/fasttext/vectorizer.rb
fasttext-0.3.0 lib/fasttext/vectorizer.rb
fasttext-0.2.4 lib/fasttext/vectorizer.rb
fasttext-0.2.3 lib/fasttext/vectorizer.rb
fasttext-0.2.2 lib/fasttext/vectorizer.rb