lib/big_sitemap.rb in big_sitemap-0.5.1 vs lib/big_sitemap.rb in big_sitemap-0.8.1

- old
+ new

@@ -1,9 +1,9 @@ require 'uri' +require 'fileutils' + require 'big_sitemap/builder' -require 'extlib' -require 'action_controller' if defined? Rails class BigSitemap DEFAULTS = { :max_per_sitemap => Builder::MAX_URLS, :batch_size => 1001, @@ -20,17 +20,14 @@ COUNT_METHODS = [:count_for_sitemap, :count] FIND_METHODS = [:find_for_sitemap, :all] TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created] PARAM_METHODS = [:to_param, :id] - include ActionController::UrlWriter if defined? Rails - def initialize(options) @options = DEFAULTS.merge options - # Use Rails' default_url_options if available - @default_url_options = defined?(Rails) ? default_url_options : {} + @default_url_options = options.delete(:default_url_options) || {} if @options[:max_per_sitemap] <= 1 raise ArgumentError, '":max_per_sitemap" must be greater than 1' end @@ -47,17 +44,11 @@ if @options[:batch_size] > @options[:max_per_sitemap] raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"' end - @options[:document_root] ||= begin - if defined? Rails - "#{Rails.root}/public" - elsif defined? Merb - "#{Merb.root}/public" - end - end + @options[:document_root] ||= document_root unless @options[:document_root] raise ArgumentError, 'Document root must be specified with the ":document_root" option' end @@ -67,52 +58,106 @@ @sources = [] @sitemap_files = [] end def add(model, options={}) - options[:path] ||= Extlib::Inflection.tableize(model.to_s) + options[:path] ||= table_name(model) + options[:filename] ||= file_name(model) + options[:primary_column] ||= 'id' if model.new.respond_to?('id') + options[:partial_update] = @options[:partial_update] && options[:partial_update] != false @sources << [model, options.dup] - return self + self end + def add_static(url, time = nil, frequency = nil, priority = nil) + @static_pages ||= [] + @static_pages << [url, time, frequency, priority] + self + end + + def with_lock + lock! + begin + yield + ensure + unlock! + end + rescue Errno::EACCES => e + STDERR.puts "Lockfile exists" + end + + def table_name(model) + model.table_name + end + + def file_name(name) + name = table_name(name) unless name.is_a? String + "#{@file_path}/sitemap_#{name}" + end + + def document_root + end + def clean Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"].each do |file| FileUtils.rm file end - return self + self end def generate + prepare_update + + generate_models + generate_static + generate_sitemap_index + self + end + + def generate_models for model, options in @sources - with_sitemap(Extlib::Inflection::tableize(model.to_s)) do |sitemap| + with_sitemap(model, options.dup) do |sitemap| + last_id = nil #id of last processed item count_method = pick_method(model, COUNT_METHODS) find_method = pick_method(model, FIND_METHODS) raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil? raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil? - count = model.send(count_method) + find_options = {} + [:conditions, :limit, :joins, :select, :order, :include, :group].each do |key| + find_options[key] = options.delete(key) + end + + primary_column = options.delete(:primary_column) + + count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil)) + count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count num_sitemaps = 1 num_batches = 1 if count > @options[:batch_size] num_batches = (count.to_f / @options[:batch_size].to_f).ceil num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil end batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f - find_options = options.except(:path, :num_items, :priority, :change_frequency, :last_modified) - for sitemap_num in 1..num_sitemaps # Work out the start and end batch numbers for this sitemap batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1 for batch_num in batch_num_start..batch_num_end - offset = ((batch_num - 1) * @options[:batch_size]) - limit = (count - offset) < @options[:batch_size] ? (count - offset - 1) : @options[:batch_size] + offset = (batch_num - 1) * @options[:batch_size] + limit = (count - offset) < @options[:batch_size] ? (count - offset) : @options[:batch_size] find_options.update(:limit => limit, :offset => offset) if num_batches > 1 + if last_id && primary_column + find_options.update(:limit => limit, :offset => nil) + primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape ' + find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > '#{primary_column_value}')"].compact.join(' AND ')) + end + model.send(find_method, find_options).each do |record| last_mod = options[:last_modified] if last_mod.is_a?(Proc) last_mod = last_mod.call(record) elsif last_mod.nil? @@ -120,29 +165,53 @@ last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method) end param_method = pick_method(record, PARAM_METHODS) - location = defined?(Rails) ? polymorphic_url(record) : nil rescue nil - location ||= "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}" + location = options[:location] + if location.is_a?(Proc) + location = location.call(record) + else + location = "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}" + end change_frequency = options[:change_frequency] || 'weekly' freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency priority = options[:priority] pri = priority.is_a?(Proc) ? priority.call(record) : priority - sitemap.add_url!(location, last_mod, freq, pri) + last_id = primary_column ? record.send(primary_column) : nil + sitemap.add_url!(location, last_mod, freq, pri, last_id) end end end end end + self + end - generate_sitemap_index + def generate_static + return self if Array(@static_pages).empty? + with_sitemap('static', :type => 'static') do |sitemap| + @static_pages.each do |location, last_mod, freq, pri| + sitemap.add_url!(location, last_mod, freq, pri) + end + end + self + end - return self + # Create a sitemap index document + def generate_sitemap_index(files = nil) + files ||= Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"] + with_sitemap 'index', :type => 'index' do |sitemap| + for path in files + next if path =~ /index/ + sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime) + end + end + self end def ping_search_engines require 'net/http' require 'cgi' @@ -184,21 +253,45 @@ end end private + def prepare_update + @files_to_move = [] + @sources.each do |model, options| + if options[:partial_update] && primary_column = options[:primary_column] && last_id = get_last_id(options[:filename]) + primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape ' + options[:conditions] = [options[:conditions], "(#{primary_column} >= '#{primary_column_value}')"].compact.join(' AND ') + options[:start_part_id] = last_id + end + end + end + + def lock!(lock_file = 'generator.lock') + File.open("#{@file_path}/#{lock_file}", 'w', File::EXCL) + end + + def unlock!(lock_file = 'generator.lock') + FileUtils.rm "#{@file_path}/#{lock_file}" + end + def with_sitemap(name, options={}) - options[:index] = name == 'index' - options[:filename] = "#{@file_path}/sitemap_#{name}" - options[:max_urls] = @options[:max_per_sitemap] + options[:filename] ||= file_name(name) + options[:type] ||= 'sitemap' + options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym] + options[:gzip] ||= @options[:gzip] + options[:indent] = options[:gzip] ? 0 : 2 - unless options[:gzip] = @options[:gzip] - options[:indent] = 2 + sitemap = if options[:type] == 'index' + IndexBuilder.new(options) + elsif options[:geo] + options[:filename] << '_kml' + GeoBuilder.new(options) + else + Builder.new(options) end - sitemap = Builder.new(options) - begin yield sitemap ensure sitemap.close! @sitemap_files.concat sitemap.paths! @@ -207,10 +300,16 @@ def strip_leading_slash(str) str.sub(/^\//, '') end + def get_last_id(filename) + Dir["#{filename}*.{xml,xml.gz}"].map do |file| + file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i + end.sort.last + end + def pick_method(model, candidates) method = nil candidates.each do |candidate| if model.respond_to? candidate method = candidate @@ -219,21 +318,45 @@ end method end def url_for_sitemap(path) - if @options[:path].blank? - "#{root_url}/#{File.basename(path)}" - else - "#{root_url}/#{@options[:path]}/#{File.basename(path)}" - end + [root_url, @options[:path], File.basename(path)].compact.join('/') end - # Create a sitemap index document - def generate_sitemap_index - with_sitemap 'index' do |sitemap| - for path in @sitemap_files - sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime) - end - end +end + + + +class BigSitemapRails < BigSitemap + + include ActionController::UrlWriter if defined? Rails + + def initialize(options) + require 'action_controller' + + super options.merge(:default_url_options => default_url_options) end + + def document_root + "#{Rails.root}/public" + end +end + + + +class BigSitemapMerb < BigSitemap + + def initialize(options) + require 'extlib' + super + end + + def document_root + "#{Merb.root}/public" + end + + def table_name(model) + Extlib::Inflection.tableize(model.to_s) + end + end \ No newline at end of file