Sha256: 3b5cbd303dd304754652b745513fb24c01770fdb4e4c5114208cf4c233b58b86
Contents?: true
Size: 1.28 KB
Versions: 4
Compression:
Stored size: 1.28 KB
Contents
require 'sinatra' require 'uri' require 'mongo' require 'date' require 'time' require File.expand_path('../../src/language-detector', __FILE__) use Rack::MethodOverride # Tweets come from a MongoDB collection. uri = URI.parse(ENV['MONGOHQ_URL']) conn = Mongo::Connection.from_uri(ENV['MONGOHQ_URL']) db = conn.db(uri.path.gsub(/^\//, '')) coll = db["tweets"] DETECTOR = LanguageDetector.load_yaml("detector2.yaml") helpers do def partial(page, locals = {}) haml page, :layout => false, :locals => locals end end layout 'layout' get '/' do haml :index end post '/' do @sentence = nil if params[:sentence] @sentence = params[:sentence] @language = DETECTOR.classify(@sentence) == "majority" ? "English" : "Not English" end haml :index end get '/tweet' do @tweet = coll.find().limit(-1).skip(rand(coll.count())).first()['text'] @language = DETECTOR.classify(@tweet) == "majority" ? "English" : "Not English" @language = "Not English" if @tweet.split.select{ |c| c =~ /[^\x00-\x80]/ }.size > 1 # Use this if you want to check for non-Roman characters. Not necessary, but sometimes there are tweets consisting solely of non-Roman characters, in which case the classifier fails (since it currently removes all non-ASCII characters). haml :tweet, :layout => false end
Version data entries
4 entries across 4 versions & 1 rubygems