Sha256: 723d87a42b9b81d80e4dfe4631f3c697dd4d3fd319158152e5be262c41cb52c8
Contents?: true
Size: 1.46 KB
Versions: 1
Compression:
Stored size: 1.46 KB
Contents
# Copyright (c) 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from fasttext import load_model from fasttext import tokenize import sys import time import tempfile import argparse def get_word_vector(data, model): t1 = time.time() print("Reading") with open(data, 'r') as f: tokens = tokenize(f.read()) t2 = time.time() print("Read TIME: " + str(t2 - t1)) print("Read NUM : " + str(len(tokens))) f = load_model(model) # This is not equivalent to piping the data into # print-word-vector, because the data is tokenized # first. t3 = time.time() i = 0 for t in tokens: f.get_word_vector(t) i += 1 if i % 10000 == 0: sys.stderr.write("\ri: " + str(float(i / len(tokens)))) sys.stderr.flush() t4 = time.time() print("\nVectoring: " + str(t4 - t3)) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Simple benchmark for get_word_vector.') parser.add_argument('model', help='A model file to use for benchmarking.') parser.add_argument('data', help='A data file to use for benchmarking.') args = parser.parse_args() get_word_vector(args.data, args.model)
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
fasttext-0.1.0 | vendor/fastText/python/benchmarks/get_word_vector.py |