require 'builder'
require 'action_view'
# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
# which lists all the sitemap files written.
module SitemapGenerator
class LinkSet
include ActionView::Helpers::NumberHelper # for number_with_delimiter
attr_accessor :default_host, :public_path, :sitemaps_path
attr_accessor :sitemap, :sitemaps, :sitemap_index
attr_accessor :verbose, :yahoo_app_id
# Evaluate the sitemap config file and write all sitemaps.
#
# This should be refactored so that we can have multiple instances
# of LinkSet.
def create
require 'sitemap_generator/interpreter'
self.public_path = File.join(::Rails.root, 'public/') if self.public_path.nil?
start_time = Time.now
SitemapGenerator::Interpreter.run
finalize!
end_time = Time.now
puts "\nSitemap stats: #{number_with_delimiter(self.link_count)} links / #{self.sitemaps.size} files / " + ("%dm%02ds" % (end_time - start_time).divmod(60)) if verbose
end
# public_path (optional) full path to the directory to write sitemaps in.
# Defaults to your Rails public/ directory.
#
# sitemaps_path (optional) path fragment within public to write sitemaps
# to e.g. 'en/'. Sitemaps are written to public_path + sitemaps_path
#
# default_host hostname including protocol to use in all sitemap links
# e.g. http://en.google.ca
def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
self.default_host = default_host
self.public_path = public_path
self.sitemaps_path = sitemaps_path
# Completed sitemaps
self.sitemaps = []
end
def link_count
self.sitemaps.inject(0) { |link_count_sum, sitemap| link_count_sum + sitemap.link_count }
end
# Called within the user's eval'ed sitemap config file. Add links to sitemap files
# passing a block.
#
# TODO: Refactor. The call chain is confusing and convoluted here.
def add_links
raise ArgumentError, "Default hostname not set" if default_host.blank?
# I'd rather have these calls in create but we have to wait
# for default_host to be set by the user's sitemap config
new_sitemap
add_default_links
yield Mapper.new(self)
end
# Called from Mapper.
#
# Add a link to the current sitemap.
def add_link(link)
unless self.sitemap << link
new_sitemap
self.sitemap << link
end
end
# Add the current sitemap to the sitemaps Array and
# start a new sitemap.
#
# If the current sitemap is nil or empty it is not added.
def new_sitemap
unless self.sitemap_index
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(public_path, sitemap_index_path, default_host)
end
unless self.sitemap
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
end
# Mark the sitemap as complete and add it to the sitemap index
unless self.sitemap.empty?
self.sitemap.finalize!
self.sitemap_index << Link.generate(self.sitemap)
self.sitemaps << self.sitemap
show_progress(self.sitemap) if verbose
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
end
end
# Report progress line.
def show_progress(sitemap)
uncompressed_size = number_to_human_size(sitemap.filesize)
compressed_size = number_to_human_size(File.size?(sitemap.full_path))
puts "+ #{sitemap.sitemap_path} #{sitemap.link_count} links / #{uncompressed_size} / #{compressed_size} gzipped"
end
# Finalize all sitemap files
def finalize!
new_sitemap
self.sitemap_index.finalize!
end
# Ping search engines.
#
# @see http://en.wikipedia.org/wiki/Sitemap_index
def ping_search_engines
require 'open-uri'
sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
search_engines = {
:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
:ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
}
puts "\n" if verbose
search_engines.each do |engine, link|
next if engine == :yahoo && !self.yahoo_app_id
begin
open(link)
puts "Successful ping of #{engine.to_s.titleize}" if verbose
rescue Timeout::Error, StandardError => e
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
end
end
if !self.yahoo_app_id && verbose
puts "\n"
puts <<-END.gsub(/^\s+/, '')
To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
END
end
end
protected
def add_default_links
self.sitemap << Link.generate('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
self.sitemap << Link.generate(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
end
# Return the current sitemap filename with index.
#
# The index depends on the length of the sitemaps array.
def new_sitemap_path
File.join(self.sitemaps_path || '', "sitemap#{self.sitemaps.length + 1}.xml.gz")
end
# Return the current sitemap index filename.
#
# At the moment we only support one index file which can link to
# up to 50,000 sitemap files.
def sitemap_index_path
File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
end
end
end