module Crawlable class Sitemap class << self attr_accessor :instance def define!(*args, &block) self.instance = self.new(*args, &block) end def path self.instance ? self.instance.path : "" end def find(path, directory) if path =~ /#{Regexp.escape(self.path)}/i return File.join(directory, self.path) end end def parse!(path) path ||= File.join(::Rails.root, 'config/sitemap.rb') eval(IO.read(path)) end def write(to, compress = false) self.instance.write(to, compress) end def process!(from, to, compress = false, &block) parse!(from) write(to, compress) end def inspect self.instance.inspect end def to_xml self.instance.to_xml end def clear self.instance.clear self.instance = nil end end attr_accessor :links, :sitemap_host, :ping, :yahoo_app_id, :path, :stylesheet def initialize(*args, &block) self.sitemap_host = args.shift options = args.extract_options! options.each do |k, v| self.send(k, v) if self.respond_to?(k) end instance_eval(&block) raise "Please define a host: 'Sitemap 'http://my-site.com' do ..." if self.sitemap_host.blank? end def path(value = nil) @path = value if value @path ||= "/sitemap.xml" @path end def yahoo_app_id(string = nil) @yahoo_app_id = string unless string.nil? @yahoo_app_id end def links @links ||= [] end def sitemap_host(*args) @sitemap_host = args unless args.empty? @sitemap_host end def stylesheet(value = nil) @stylesheet = value if value @stylesheet end def ping(*args) @ping = args unless args.empty? @ping end def sitemap_path(string = nil) @sitemap_path = string || "public/sitemap.xml" end def link(path, *args, &block) options = args.extract_options! options.assert_valid_keys(:priority, :changes, :updated_at, :sitemap_host) options.reverse_merge!( :priority => 0.5, :changes => 'monthly', :updated_at => Time.now, :host => self.sitemap_host ) result = { :host => options[:host], :path => path, :url => URI.join(options[:host], path).to_s, :priority => options[:priority], :changes => options[:changes], :updated_at => options[:updated_at], :images => [] } self.links.push(result) instance_eval(&block) if block_given? result end def image(path, *args, &block) options = args.extract_options! options.assert_valid_keys(:priority, :changes, :updated_at, :host) result = { :path => path, :caption => options[:caption], :geo_location => options[:geo_location], :title => options[:title], :license => options[:license] } self.links.last[:images].push(result) end def w3c_date(date) date.utc.strftime("%Y-%m-%dT%H:%M:%S+00:00") end def to_xml namespaces = { "xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9", "xmlns:image" => "http://www.google.com/schemas/sitemap-image/1.1" } builder = Nokogiri::XML::Builder.new do |xml| xml.urlset(namespaces) do self.links.each do |link| xml.url do xml.loc link[:path] xml.lastmod w3c_date(link[:updated_at]) if link[:updated_at] xml.changefreq link[:changes] if link[:changes] xml.priority link[:priority] if link[:priority] link[:images].each do |image| xml["image"].image do xml["image"].loc image[:path] xml["image"].caption image[:caption] if image[:caption] xml["image"].geo_location image[:geo_location] if image[:geo_location] xml["image"].title image[:title] if image[:title] xml["image"].license image[:license] if image[:license] end end end end end end xml = builder.to_xml # can't add processing instructions with nokogiri xml.gsub!("") do |head| result = head result << "\n" result << "" end if stylesheet xml end def write(path, compress) to = path if compress to << ".gz" unless File.extname(path) == ".gz" File.open(to, 'wb') do |file| gz = Zlib::GzipWriter.new(file) gz.write to_xml gz.close end else File.open(to, 'wb') do |file| file.puts to_xml end end end def notify engines = { :google => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{path}", :yahoo => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{path}&appid=#{yahoo_app_id}", :ask => "http://submissions.ask.com/ping?sitemap=#{path}", :bing => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{path}", :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{path}" } engines.each do |engine, link| begin open(link) puts "Successful ping of #{engine.to_s.titleize}" rescue Timeout::Error, StandardError => e puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect}" end end end def clear @links = nil end def inspect "" end end end