require 'enumerator' require 'net/http' require 'uri' class SEOException < Exception end class SEOChecker def initialize(url, options={}) @url = url @locations = [] @titles = {} @descriptions = {} @id_urls = {} @excessive_keywords = [] @nesting_subdirectories = [] @no_titles = [] @no_descriptions = [] @unreachables = [] @batch_size = options[:batch_size] ? options[:batch_size].to_i : nil @interval_time = options[:interval_time].to_i end def check begin check_sitemap check_location report rescue SEOException => e puts e.message end end def check_sitemap #TODO: allow manual sitemap file uri = URI.parse(@url) uri.path = '/sitemap.xml' response = get_response(uri) if response.is_a? Net::HTTPSuccess @locations = response.body.scan(%r{(.*?)}).flatten else raise SEOException, "Error: There is no sitemap.xml." end end def check_location @batch_size ||= @locations.size @locations.each_slice(@batch_size) do |batch_locations| batch_locations.each do |location| response = get_response(URI.parse(location)) if response.is_a? Net::HTTPSuccess if response.body =~ %r{(.*?)}m check_title($1, location) check_description($1, location) else @no_titles << location @no_descriptions << location end check_url(location) else @unreachables << location end end sleep(@interval_time) end end private def get_response(uri) http = Net::HTTP.new(uri.host, uri.port) request = Net::HTTP::Get.new(uri.request_uri) request["User-Agent"] = "seo-checker" response = http.request(request) end def check_title(header_string, location) if header_string =~ %r{(.*?)} title = $1 (@titles[title] ||= []) << location else @no_titles << location end end def check_description(header_string, location) if header_string =~ %r{|} description = $1 || $2 (@descriptions[description] ||= []) << location else @no_descriptions << location end end def check_url(location) items = location.split('/') if items.find { |item| item =~ /^\d+$/ } || items.last =~ /^\d+\.htm(l)?/ @id_urls[location.gsub(%r{/\d+/}, 'id')] = location end if items.find { |item| item.split('-').size > 5 } @excessive_keywords << location end if items.size > 8 @nesting_subdirectories << location end end def report unless @unreachables.empty? print "#{@unreachables.slice(0, 5).join(",\n")} #{'and ...' if @unreachables.size > 5} are unreachable.\n\n" end unless @no_titles.empty? print "#{@no_titles.slice(0, 5).join(",\n")} #{'and ...' if @no_titles.size > 5} have no title.\n\n" end unless @no_descriptions.empty? print "#{@no_descriptions.slice(0, 5).join(",\n")} #{'and ...' if @no_descriptions.size > 5} have no description.\n\n" end @titles.each do |title, locations| if locations.size > 1 print "#{locations.slice(0, 5).join(",\n")} #{'and ...' if locations.size > 5} have the same title '#{title}'.\n\n" end end @descriptions.each do |description, locations| if locations.size > 1 print "#{locations.slice(0, 5).join(",\n")} #{'and ...' if locations.size > 5} have the same description '#{description}'.\n\n" end end unless @id_urls.empty? print "#{@id_urls.values.slice(0, 5).join(",\n")} #{'and ...' if @id_urls.values.size > 5} use ID number in URL.\n\n" end unless @excessive_keywords.empty? print "#{@excessive_keywords.slice(0, 5).join(",\n")} #{'and ...' if @excessive_keywords.size > 5} use excessive keywords in URL.\n\n" end unless @nesting_subdirectories.empty? print "#{@nesting_subdirectories.slice(0, 5).join(",\n")} #{'and ...' if @nesting_subdirectories.size > 5} have deep nesting of subdirectories in URL.\n\n" end end end