lib/name-spotter.rb in name-spotter-0.2.1 vs lib/name-spotter.rb in name-spotter-0.2.2

- old
+ new

@@ -3,15 +3,33 @@ require "uri" require "json" require "nokogiri" require "socket" require "unicode_utils" +require 'unsupervised-language-detection' require File.join(File.dirname(__FILE__), 'name-spotter', 'client') Dir["#{File.dirname(__FILE__)}/name-spotter/**/*.rb"].each {|f| require f} class NameSpotter + + def self.english?(text) + tweets = text.split(/\s+/).inject([]) do |res, w| + if w.match(/[A-Za-z]/) + if res.empty? || res[-1].size >=15 + res << [w] + else + res[-1] << w + end + end + res + end + eng, not_eng = tweets.shuffle[0...50].partition {|a| UnsupervisedLanguageDetection.is_english_tweet?(a.join(" "))} + percentage = eng.size.to_f/(not_eng.size + eng.size) + puts percentage + percentage > 0.5 + end def initialize(client) @client = client end @@ -20,9 +38,10 @@ names = @client.find(text) names = names.map{ |n| n.to_hash } return { names: names } unless format format == "json" ? to_json(names) : to_xml(names) end + private def to_text(input) input