require 'builder'
require 'action_view'
# A LinkSet provisions a bunch of links to sitemap files. It also writes the index file
# which lists all the sitemap files written.
module SitemapGenerator
class LinkSet
include ActionView::Helpers::NumberHelper # for number_with_delimiter
attr_reader :default_host, :public_path, :sitemaps_path
attr_accessor :sitemap, :sitemap_index
attr_accessor :verbose, :yahoo_app_id
# Evaluate the sitemap config file and write all sitemaps.
#
# The Sitemap Interpreter includes the URL helpers and API methods
# that the block argument to `add_links` is evaluted within.
#
# TODO: Refactor so that we can have multiple instances
# of LinkSet.
def create(&block)
require 'sitemap_generator/interpreter'
start_time = Time.now
if self.sitemap_index.finalized?
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(@public_path, sitemap_index_path)
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(@public_path, new_sitemap_path)
end
SitemapGenerator::Interpreter.new(self, &block)
unless self.sitemap.finalized?
self.sitemap_index.add(self.sitemap)
puts self.sitemap.summary if verbose
end
self.sitemap_index.finalize!
end_time = Time.now
if verbose
puts self.sitemap_index.summary
puts "\nSitemap stats: #{number_with_delimiter(self.sitemap_index.total_link_count)} links / #{self.sitemap_index.sitemaps.size} sitemaps / " +
("%dm%02ds" % (end_time - start_time).divmod(60))
end
end
# Constructor
#
# public_path (optional) full path to the directory to write sitemaps in.
# Defaults to your Rails public/ directory.
#
# sitemaps_path (optional) path fragment within public to write sitemaps
# to e.g. 'en/'. Sitemaps are written to public_path + sitemaps_path
#
# default_host hostname including protocol to use in all sitemap links
# e.g. http://en.google.ca
def initialize(public_path = nil, sitemaps_path = nil, default_host = nil)
@default_host = default_host
@public_path = public_path
@sitemaps_path = sitemaps_path
@public_path = File.join(::Rails.root, 'public/') if @public_path.nil?
# Default host is not set yet. Set it on these objects when `add_links` is called
self.sitemap_index = SitemapGenerator::Builder::SitemapIndexFile.new(@public_path, sitemap_index_path)
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(@public_path, new_sitemap_path)
end
# Entry point for users.
#
# Called within the user's eval'ed sitemap config file. Add links to sitemap files
# passing a block.
#
# TODO: Refactor. The call chain is confusing and convoluted here.
def add_links
raise ArgumentError, "Default hostname not set" if default_host.blank?
# Set default host on the sitemap objects and seed the sitemap with the default links
self.sitemap.hostname = self.sitemap_index.hostname = default_host
self.sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
self.sitemap.add(self.sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0)
yield self
end
# Add a link to a Sitemap. If a new Sitemap is required, one will be created for
# you.
def add(link, options={})
begin
self.sitemap.add(link, options)
rescue SitemapGenerator::SitemapError => e
if e.is_a?(SitemapGenerator::SitemapFullError)
self.sitemap_index.add(self.sitemap)
puts self.sitemap.summary if verbose
end
self.sitemap = SitemapGenerator::Builder::SitemapFile.new(public_path, new_sitemap_path, default_host)
retry
end
end
# Ping search engines.
#
# @see http://en.wikipedia.org/wiki/Sitemap_index
def ping_search_engines
require 'open-uri'
sitemap_index_url = CGI.escape(self.sitemap_index.full_url)
search_engines = {
:google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
:yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
:ask => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
:bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
:sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
}
puts "\n" if verbose
search_engines.each do |engine, link|
next if engine == :yahoo && !self.yahoo_app_id
begin
open(link)
puts "Successful ping of #{engine.to_s.titleize}" if verbose
rescue Timeout::Error, StandardError => e
puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
end
end
if !self.yahoo_app_id && verbose
puts "\n"
puts <<-END.gsub(/^\s+/, '')
To ping Yahoo you require a Yahoo AppID. Add it to your config/sitemap.rb with:
SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"
For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
END
end
end
def link_count
self.sitemap_index.total_link_count
end
def default_host=(value)
@default_host = value
self.sitemap_index.hostname = value unless self.sitemap_index.finalized?
self.sitemap.hostname = value unless self.sitemap.finalized?
end
def public_path=(value)
@public_path = value
self.sitemap_index.public_path = value unless self.sitemap_index.finalized?
self.sitemap.public_path = value unless self.sitemap.finalized?
end
def sitemaps_path=(value)
@sitemaps_path = value
self.sitemap_index.sitemap_path = sitemap_index_path unless self.sitemap_index.finalized?
self.sitemap.sitemap_path = new_sitemap_path unless self.sitemap.finalized?
end
protected
# Return the current sitemap filename with index.
#
# The index depends on the length of the sitemaps array.
def new_sitemap_path
File.join(self.sitemaps_path || '', "sitemap#{self.sitemap_index.sitemaps.length + 1}.xml.gz")
end
# Return the current sitemap index filename.
#
# At the moment we only support one index file which can link to
# up to 50,000 sitemap files.
def sitemap_index_path
File.join(self.sitemaps_path || '', 'sitemap_index.xml.gz')
end
end
end