require 'zlib' require 'fileutils' require 'sitemap_generator/helpers/number_helper' module SitemapGenerator module Builder # # General Usage: # # sitemap = SitemapFile.new(:location => SitemapLocation.new(...)) # sitemap.add('/', { ... }) <- add a link to the sitemap # sitemap.finalize! <- write the sitemap file and freeze the object to protect it from further modification # class SitemapFile include SitemapGenerator::Helpers::NumberHelper attr_reader :link_count, :filesize, :location, :news_count # === Options # # * location - a SitemapGenerator::SitemapLocation instance or a Hash of options # from which a SitemapLocation will be created for you. def initialize(opts={}) @location = opts.is_a?(Hash) ? SitemapGenerator::SitemapLocation.new(opts) : opts @link_count = 0 @news_count = 0 @xml_content = '' # XML urlset content @xml_wrapper_start = <<-HTML HTML @xml_wrapper_start.gsub!(/\s+/, ' ').gsub!(/ *> */, '>').strip! @xml_wrapper_end = %q[] @filesize = bytesize(@xml_wrapper_start) + bytesize(@xml_wrapper_end) @written = false @reserved_name = nil # holds the name reserved from the namer @frozen = false # rather than actually freeze, use this boolean end # If a name has been reserved, use the last modified time from the file. # Otherwise return nil. We don't want to prematurely assign a name # for this sitemap if one has not yet been reserved, because we may # mess up the name-assignment sequence. def lastmod File.mtime(location.path) if location.reserved_name? rescue nil end def empty? @link_count == 0 end # Return a boolean indicating whether the sitemap file can fit another link # of bytes bytes in size. You can also pass a string and the # bytesize will be calculated for you. def file_can_fit?(bytes) bytes = bytes.is_a?(String) ? bytesize(bytes) : bytes (@filesize + bytes) < SitemapGenerator::MAX_SITEMAP_FILESIZE && @link_count < SitemapGenerator::MAX_SITEMAP_LINKS && @news_count < SitemapGenerator::MAX_SITEMAP_NEWS end # Add a link to the sitemap file. # # If a link cannot be added, for example if the file is too large or the link # limit has been reached, a SitemapGenerator::SitemapFullError exception is raised # and the sitemap is finalized. # # If the Sitemap has already been finalized a SitemapGenerator::SitemapFinalizedError # exception is raised. # # Return the new link count. # # Call with: # sitemap_url - a SitemapUrl instance # sitemap, options - a Sitemap instance and options hash # path, options - a path for the URL and options hash # # KJV: We should be using the host from the Location object if no host is # specified in the call to add(). The issue is noticeable when we add links # to a sitemap direct as in the following example: # ls = SitemapGenerator::LinkSet.new(:default_host => 'http://abc.com') # ls.sitemap_index.add('/link') # This raises a RuntimeError: Cannot generate a url without a host # Expected: the link added to the sitemap should use the host from its # location object if no host has been specified. def add(link, options={}) raise SitemapGenerator::SitemapFinalizedError if finalized? sitemap_url = (link.is_a?(SitemapUrl) ? link : SitemapUrl.new(link, options) ) xml = sitemap_url.to_xml raise SitemapGenerator::SitemapFullError if !file_can_fit?(xml) if sitemap_url.news? @news_count += 1 end # Add the XML to the sitemap @xml_content << xml @filesize += bytesize(xml) @link_count += 1 end # "Freeze" this object. Actually just flags it as frozen. # # A SitemapGenerator::SitemapFinalizedError exception is raised if the Sitemap # has already been finalized. def finalize! raise SitemapGenerator::SitemapFinalizedError if finalized? @frozen = true end def finalized? @frozen end # Write out the sitemap and free up memory. # # All the xml content in the instance is cleared, but attributes like # filesize are still available. # # A SitemapGenerator::SitemapError exception is raised if the file has # already been written. def write raise SitemapGenerator::SitemapError.new("Sitemap already written!") if written? finalize! unless finalized? reserve_name @location.write(@xml_wrapper_start + @xml_content + @xml_wrapper_end) @xml_content = @xml_wrapper_start = @xml_wrapper_end = '' puts summary if @location.verbose? @written = true end # Return true if this file has been written out to disk def written? @written end # Reserve a name from the namer unless one has already been reserved. # Safe to call more than once. def reserve_name @reserved_name ||= @location.reserve_name end # Return a boolean indicating whether a name has been reserved def reserved_name? !!@reserved_name end # Return a new instance of the sitemap file with the same options, # and the next name in the sequence. def new location = @location.dup location.delete(:filename) if location.namer self.class.new(location) end # Return a summary string def summary(opts={}) uncompressed_size = number_to_human_size(@filesize) compressed_size = number_to_human_size(@location.filesize) path = ellipsis(@location.path_in_public, 47) "+ #{'%-47s' % path} #{'%10s' % @link_count} links / #{'%10s' % compressed_size}" end protected # Replace the last 3 characters of string with ... if the string is as big # or bigger than max. def ellipsis(string, max) if string.size >= max string[0, max - 3] + '...' else string end end # Return the bytesize length of the string. Ruby 1.8.6 compatible. def bytesize(string) string.respond_to?(:bytesize) ? string.bytesize : string.length end end end end