Sha256: e1cda3e52bcd487b501b326b7ebbfa2c7810211b666dec33d12fb08c841b236a
Contents?: true
Size: 408 Bytes
Versions: 2
Compression:
Stored size: 408 Bytes
Contents
# Build an unsupervised language classifier for tweets, # using trigrams from a set of 5000 tweets. require_relative './language-detector' TWEETS_FILENAME = "datasets/tweets_5000.txt" training_sentences = File.readlines(TWEETS_FILENAME).map{ |tweet| tweet.normalize } detector = LanguageDetector.new(:ngram_size => 3) detector.train(30, training_sentences) detector.yamlize("english-tweet-detector.yaml")
Version data entries
2 entries across 2 versions & 1 rubygems