lib/big_sitemap.rb in big_sitemap-0.5.1 vs lib/big_sitemap.rb in big_sitemap-0.8.1
- old
+ new
@@ -1,9 +1,9 @@
require 'uri'
+require 'fileutils'
+
require 'big_sitemap/builder'
-require 'extlib'
-require 'action_controller' if defined? Rails
class BigSitemap
DEFAULTS = {
:max_per_sitemap => Builder::MAX_URLS,
:batch_size => 1001,
@@ -20,17 +20,14 @@
COUNT_METHODS = [:count_for_sitemap, :count]
FIND_METHODS = [:find_for_sitemap, :all]
TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
PARAM_METHODS = [:to_param, :id]
- include ActionController::UrlWriter if defined? Rails
-
def initialize(options)
@options = DEFAULTS.merge options
- # Use Rails' default_url_options if available
- @default_url_options = defined?(Rails) ? default_url_options : {}
+ @default_url_options = options.delete(:default_url_options) || {}
if @options[:max_per_sitemap] <= 1
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
end
@@ -47,17 +44,11 @@
if @options[:batch_size] > @options[:max_per_sitemap]
raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
end
- @options[:document_root] ||= begin
- if defined? Rails
- "#{Rails.root}/public"
- elsif defined? Merb
- "#{Merb.root}/public"
- end
- end
+ @options[:document_root] ||= document_root
unless @options[:document_root]
raise ArgumentError, 'Document root must be specified with the ":document_root" option'
end
@@ -67,52 +58,106 @@
@sources = []
@sitemap_files = []
end
def add(model, options={})
- options[:path] ||= Extlib::Inflection.tableize(model.to_s)
+ options[:path] ||= table_name(model)
+ options[:filename] ||= file_name(model)
+ options[:primary_column] ||= 'id' if model.new.respond_to?('id')
+ options[:partial_update] = @options[:partial_update] && options[:partial_update] != false
@sources << [model, options.dup]
- return self
+ self
end
+ def add_static(url, time = nil, frequency = nil, priority = nil)
+ @static_pages ||= []
+ @static_pages << [url, time, frequency, priority]
+ self
+ end
+
+ def with_lock
+ lock!
+ begin
+ yield
+ ensure
+ unlock!
+ end
+ rescue Errno::EACCES => e
+ STDERR.puts "Lockfile exists"
+ end
+
+ def table_name(model)
+ model.table_name
+ end
+
+ def file_name(name)
+ name = table_name(name) unless name.is_a? String
+ "#{@file_path}/sitemap_#{name}"
+ end
+
+ def document_root
+ end
+
def clean
Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"].each do |file|
FileUtils.rm file
end
- return self
+ self
end
def generate
+ prepare_update
+
+ generate_models
+ generate_static
+ generate_sitemap_index
+ self
+ end
+
+ def generate_models
for model, options in @sources
- with_sitemap(Extlib::Inflection::tableize(model.to_s)) do |sitemap|
+ with_sitemap(model, options.dup) do |sitemap|
+ last_id = nil #id of last processed item
count_method = pick_method(model, COUNT_METHODS)
find_method = pick_method(model, FIND_METHODS)
raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil?
raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil?
- count = model.send(count_method)
+ find_options = {}
+ [:conditions, :limit, :joins, :select, :order, :include, :group].each do |key|
+ find_options[key] = options.delete(key)
+ end
+
+ primary_column = options.delete(:primary_column)
+
+ count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil))
+ count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count
num_sitemaps = 1
num_batches = 1
if count > @options[:batch_size]
num_batches = (count.to_f / @options[:batch_size].to_f).ceil
num_sitemaps = (count.to_f / @options[:max_per_sitemap].to_f).ceil
end
batches_per_sitemap = num_batches.to_f / num_sitemaps.to_f
- find_options = options.except(:path, :num_items, :priority, :change_frequency, :last_modified)
-
for sitemap_num in 1..num_sitemaps
# Work out the start and end batch numbers for this sitemap
batch_num_start = sitemap_num == 1 ? 1 : ((sitemap_num * batches_per_sitemap).ceil - batches_per_sitemap + 1).to_i
batch_num_end = (batch_num_start + [batches_per_sitemap, num_batches].min).floor - 1
for batch_num in batch_num_start..batch_num_end
- offset = ((batch_num - 1) * @options[:batch_size])
- limit = (count - offset) < @options[:batch_size] ? (count - offset - 1) : @options[:batch_size]
+ offset = (batch_num - 1) * @options[:batch_size]
+ limit = (count - offset) < @options[:batch_size] ? (count - offset) : @options[:batch_size]
find_options.update(:limit => limit, :offset => offset) if num_batches > 1
+ if last_id && primary_column
+ find_options.update(:limit => limit, :offset => nil)
+ primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape '
+ find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > '#{primary_column_value}')"].compact.join(' AND '))
+ end
+
model.send(find_method, find_options).each do |record|
last_mod = options[:last_modified]
if last_mod.is_a?(Proc)
last_mod = last_mod.call(record)
elsif last_mod.nil?
@@ -120,29 +165,53 @@
last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method)
end
param_method = pick_method(record, PARAM_METHODS)
- location = defined?(Rails) ? polymorphic_url(record) : nil rescue nil
- location ||= "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}"
+ location = options[:location]
+ if location.is_a?(Proc)
+ location = location.call(record)
+ else
+ location = "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}"
+ end
change_frequency = options[:change_frequency] || 'weekly'
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
priority = options[:priority]
pri = priority.is_a?(Proc) ? priority.call(record) : priority
- sitemap.add_url!(location, last_mod, freq, pri)
+ last_id = primary_column ? record.send(primary_column) : nil
+ sitemap.add_url!(location, last_mod, freq, pri, last_id)
end
end
end
end
end
+ self
+ end
- generate_sitemap_index
+ def generate_static
+ return self if Array(@static_pages).empty?
+ with_sitemap('static', :type => 'static') do |sitemap|
+ @static_pages.each do |location, last_mod, freq, pri|
+ sitemap.add_url!(location, last_mod, freq, pri)
+ end
+ end
+ self
+ end
- return self
+ # Create a sitemap index document
+ def generate_sitemap_index(files = nil)
+ files ||= Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"]
+ with_sitemap 'index', :type => 'index' do |sitemap|
+ for path in files
+ next if path =~ /index/
+ sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime)
+ end
+ end
+ self
end
def ping_search_engines
require 'net/http'
require 'cgi'
@@ -184,21 +253,45 @@
end
end
private
+ def prepare_update
+ @files_to_move = []
+ @sources.each do |model, options|
+ if options[:partial_update] && primary_column = options[:primary_column] && last_id = get_last_id(options[:filename])
+ primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape '
+ options[:conditions] = [options[:conditions], "(#{primary_column} >= '#{primary_column_value}')"].compact.join(' AND ')
+ options[:start_part_id] = last_id
+ end
+ end
+ end
+
+ def lock!(lock_file = 'generator.lock')
+ File.open("#{@file_path}/#{lock_file}", 'w', File::EXCL)
+ end
+
+ def unlock!(lock_file = 'generator.lock')
+ FileUtils.rm "#{@file_path}/#{lock_file}"
+ end
+
def with_sitemap(name, options={})
- options[:index] = name == 'index'
- options[:filename] = "#{@file_path}/sitemap_#{name}"
- options[:max_urls] = @options[:max_per_sitemap]
+ options[:filename] ||= file_name(name)
+ options[:type] ||= 'sitemap'
+ options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
+ options[:gzip] ||= @options[:gzip]
+ options[:indent] = options[:gzip] ? 0 : 2
- unless options[:gzip] = @options[:gzip]
- options[:indent] = 2
+ sitemap = if options[:type] == 'index'
+ IndexBuilder.new(options)
+ elsif options[:geo]
+ options[:filename] << '_kml'
+ GeoBuilder.new(options)
+ else
+ Builder.new(options)
end
- sitemap = Builder.new(options)
-
begin
yield sitemap
ensure
sitemap.close!
@sitemap_files.concat sitemap.paths!
@@ -207,10 +300,16 @@
def strip_leading_slash(str)
str.sub(/^\//, '')
end
+ def get_last_id(filename)
+ Dir["#{filename}*.{xml,xml.gz}"].map do |file|
+ file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
+ end.sort.last
+ end
+
def pick_method(model, candidates)
method = nil
candidates.each do |candidate|
if model.respond_to? candidate
method = candidate
@@ -219,21 +318,45 @@
end
method
end
def url_for_sitemap(path)
- if @options[:path].blank?
- "#{root_url}/#{File.basename(path)}"
- else
- "#{root_url}/#{@options[:path]}/#{File.basename(path)}"
- end
+ [root_url, @options[:path], File.basename(path)].compact.join('/')
end
- # Create a sitemap index document
- def generate_sitemap_index
- with_sitemap 'index' do |sitemap|
- for path in @sitemap_files
- sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime)
- end
- end
+end
+
+
+
+class BigSitemapRails < BigSitemap
+
+ include ActionController::UrlWriter if defined? Rails
+
+ def initialize(options)
+ require 'action_controller'
+
+ super options.merge(:default_url_options => default_url_options)
end
+
+ def document_root
+ "#{Rails.root}/public"
+ end
+end
+
+
+
+class BigSitemapMerb < BigSitemap
+
+ def initialize(options)
+ require 'extlib'
+ super
+ end
+
+ def document_root
+ "#{Merb.root}/public"
+ end
+
+ def table_name(model)
+ Extlib::Inflection.tableize(model.to_s)
+ end
+
end
\ No newline at end of file