lib/big_sitemap.rb in alexrabarts-big_sitemap-0.3.0 vs lib/big_sitemap.rb in alexrabarts-big_sitemap-0.3.1

- old
+ new

@@ -1,13 +1,12 @@ require 'uri' -require 'zlib' -require 'builder' +require 'big_sitemap/builder' require 'extlib' class BigSitemap DEFAULTS = { - :max_per_sitemap => 50000, + :max_per_sitemap => Builder::MAX_URLS, :batch_size => 1001, :path => 'sitemaps', :gzip => true, # opinionated @@ -28,10 +27,14 @@ @options = DEFAULTS.merge options # Use Rails' default_url_options if available @default_url_options = defined?(Rails) ? default_url_options : {} + if @options[:max_per_sitemap] <= 1 + raise ArgumentError, '":max_per_sitemap" must be greater than 1' + end + if @options[:url_options] @default_url_options.update @options[:url_options] elsif @options[:base_url] uri = URI.parse(@options[:base_url]) @default_url_options[:host] = uri.host @@ -77,75 +80,59 @@ return self end def generate for model, options in @sources - count_method = pick_method(model, COUNT_METHODS) - find_method = pick_method(model, FIND_METHODS) - raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil? - raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil? + with_sitemap(Extlib::Inflection::tableize(model.to_s)) do |sitemap| + count_method = pick_method(model, COUNT_METHODS) + find_method = pick_method(model, FIND_METHODS) + raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil? + raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil? - count = model.send(count_method) - num_sitemaps = 1 - num_batches = 1 + count = model.send(count_method) + num_sitemaps = 1 + num_batches = 1 - if count > @options[:batch_size] - num_batches = (count.to_f / @options[:batch_size].to_f).ceil - num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil - end - batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f + if count > @options[:batch_size] + num_batches = (count.to_f / @options[:batch_size].to_f).ceil + num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil + end + batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f - find_options = options.dup + find_options = options.dup - for sitemap_num in 1..num_sitemaps - # Work out the start and end batch numbers for this sitemap - batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i - batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1 + for sitemap_num in 1..num_sitemaps + # Work out the start and end batch numbers for this sitemap + batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i + batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1 - # Stream XML output to a file - filename = "sitemap_#{Extlib::Inflection::tableize(model.to_s)}" - filename << "_#{sitemap_num}" if num_sitemaps > 1 - - f = xml_open(filename) - - xml = Builder::XmlMarkup.new(:target => f) - xml.instruct! - xml.urlset(:xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9') do for batch_num in batch_num_start..batch_num_end offset = ((batch_num - 1) * @options[:batch_size]) limit = (count - offset) < @options[:batch_size] ? (count - offset - 1) : @options[:batch_size] find_options.update(:limit => limit, :offset => offset) if num_batches > 1 - model.send(find_method, find_options).each do |r| - last_mod_method = pick_method(r, TIMESTAMP_METHODS) - last_mod = last_mod_method.nil? ? Time.now : r.send(last_mod_method) + model.send(find_method, find_options).each do |record| + last_mod_method = pick_method(record, TIMESTAMP_METHODS) + last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method) - param_method = pick_method(r, PARAM_METHODS) + param_method = pick_method(record, PARAM_METHODS) - xml.url do - location = defined?(Rails) ? - polymorphic_url(r) : - "#{root_url}/#{strip_leading_slash(options[:path])}/#{r.send(param_method)}" - xml.loc(location) + location = defined?(Rails) ? + polymorphic_url(record) : + "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}" - xml.lastmod(last_mod.strftime('%Y-%m-%d')) unless last_mod.nil? + change_frequency = options[:change_frequency] || 'weekly' + freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency - change_frequency = options[:change_frequency] || 'weekly' - xml.changefreq(change_frequency.is_a?(Proc) ? change_frequency.call(r) : change_frequency) + priority = options[:priority] + pri = priority.is_a?(Proc) ? priority.call(record) : priority - priority = options[:priority] - unless priority.nil? - xml.priority(priority.is_a?(Proc) ? priority.call(r) : priority) - end - end + sitemap.add_url!(location, last_mod, freq, pri) end end end - - f.close end - end generate_sitemap_index return self @@ -191,10 +178,29 @@ end end private + def with_sitemap(name, options={}) + options[:index] = name == 'index' + options[:filename] = "#{@file_path}/sitemap_#{name}" + options[:max_urls] = @options[:max_per_sitemap] + + unless options[:gzip] = @options[:gzip] + options[:indent] = 2 + end + + sitemap = Builder.new(options) + + begin + yield sitemap + ensure + sitemap.close! + @sitemap_files.concat sitemap.paths! + end + end + def strip_leading_slash(str) str.sub(/^\//, '') end def pick_method(model, candidates) @@ -206,43 +212,18 @@ end end method end - def xml_open(filename) - filename << '.xml' - filename << '.gz' if @options[:gzip] - - file = File.open("#{@file_path}/#{filename}", 'w+') - - @sitemap_files << file.path - - writer = @options[:gzip] ? Zlib::GzipWriter.new(file) : file - - if block_given? - yield writer - writer.close - end - - writer - end - def url_for_sitemap(path) "#{root_url}/#{File.basename(path)}" end # Create a sitemap index document def generate_sitemap_index - xml_open 'sitemap_index' do |file| - xml = Builder::XmlMarkup.new(:target => file) - xml.instruct! - xml.sitemapindex(:xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9') do - for path in @sitemap_files[0..-2] - xml.sitemap do - xml.loc(url_for_sitemap(path)) - xml.lastmod(Time.now.strftime('%Y-%m-%d')) - end - end + with_sitemap 'index' do |sitemap| + for path in @sitemap_files + sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime) end end end end