Sha256: bd864197634023896fa1b2d7bb56f9c030d5b9d607a82947c612d3850436d6a6
Contents?: true
Size: 1.4 KB
Versions: 3
Compression:
Stored size: 1.4 KB
Contents
module FastText class Vectorizer < Model DEFAULT_OPTIONS = { lr: 0.5, lr_update_rate: 100, dim: 100, ws: 5, epoch: 5, min_count: 1, min_count_label: 0, neg: 5, word_ngrams: 1, loss: "ns", model: "skipgram", bucket: 2000000, minn: 3, maxn: 6, thread: 3, t: 0.0001, verbose: 2, pretrained_vectors: "", save_output: false, seed: 0, autotune_validation_file: "", autotune_metric: "f1", autotune_predictions: 1, autotune_duration: 60 * 5, autotune_model_size: "" } def fit(x) input = input_path(x) @m ||= Ext::Model.new m.train(DEFAULT_OPTIONS.merge(@options).merge(input: input)) end def nearest_neighbors(word, k: 10) m.nearest_neighbors(word, k).map(&:reverse).to_h end def analogies(word_a, word_b, word_c, k: 10) m.analogies(k, word_a, word_b, word_c).map(&:reverse).to_h end private # separate example by newlines # https://github.com/facebookresearch/fastText/issues/518 def input_path(x) if x.is_a?(String) x else tempfile = Tempfile.new("fasttext") x.each do |xi| tempfile.write(xi.gsub("\n", " ")) # replace newlines in document tempfile.write("\n") end tempfile.close tempfile.path end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
fasttext-0.2.1 | lib/fasttext/vectorizer.rb |
fasttext-0.2.0 | lib/fasttext/vectorizer.rb |
fasttext-0.1.3 | lib/fasttext/vectorizer.rb |