lib/big_sitemap.rb in big_sitemap-0.8.3 vs lib/big_sitemap.rb in big_sitemap-1.0.0
- old
+ new
@@ -4,45 +4,71 @@
require 'big_sitemap/builder'
class BigSitemap
DEFAULTS = {
:max_per_sitemap => Builder::MAX_URLS,
- :batch_size => 1001,
- :document_path => 'sitemaps/',
+ :batch_size => 1001, # TODO: Deprecate
+ :document_path => '/',
:gzip => true,
- # opinionated
+ # Opinionated
:ping_google => true,
:ping_yahoo => false, # needs :yahoo_app_id
:ping_bing => false,
:ping_ask => false
}
+ # TODO: Deprecate
COUNT_METHODS = [:count_for_sitemap, :count]
FIND_METHODS = [:find_for_sitemap, :all]
TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created]
PARAM_METHODS = [:to_param, :id]
+ class << self
+ def generate(options={}, &block)
+ @sitemap = self.new(options)
+
+ @sitemap.first_id_of_last_sitemap = first_id_of_last_sitemap
+
+ instance_eval(&block)
+
+ @sitemap.with_lock do
+ @sitemap.generate(options)
+ end
+ end
+
+ private
+
+ def first_id_of_last_sitemap
+ Dir["#{@sitemap.document_full}sitemap*.{xml,xml.gz}"].map do |file|
+ file.to_s.scan(/sitemap_(.+).xml/).flatten.last.to_i
+ end.sort.last
+ end
+
+ def add(path, options={})
+ @sitemap.add_path(path, options)
+ end
+ end
+
def initialize(options={})
@options = DEFAULTS.merge options
- @options[:document_path] ||= @options[:path] #for legacy reasons
if @options[:max_per_sitemap] <= 1
raise ArgumentError, '":max_per_sitemap" must be greater than 1'
end
- if @options[:url_options]
+ if @options[:url_options] && !@options[:base_url]
@options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s
end
unless @options[:base_url]
raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string'
end
@options[:url_path] ||= @options[:document_path]
- if @options[:batch_size] > @options[:max_per_sitemap]
- raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"'
+ unless @options[:document_root]
+ raise ArgumentError, 'Document root must be specified with the ":document_root" option"'
end
@options[:document_full] ||= File.join(@options[:document_root], @options[:document_path])
unless @options[:document_full]
raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"'
@@ -53,11 +79,24 @@
@sources = []
@models = []
@sitemap_files = []
end
+ def first_id_of_last_sitemap
+ @first_id_of_last_sitemap
+ end
+
+ def first_id_of_last_sitemap=(first_id)
+ @first_id_of_last_sitemap = first_id
+ end
+
+ def document_full
+ @options[:document_full]
+ end
+
def add(model, options={})
+ warn 'BigSitemap#add is deprecated. Please use BigSitemap.generate and call add inside the block (in BigSitemap 1.0.0+). You will have to perform the find and generate the path for each record yourself.'
@models << model
filename_suffix = @models.count(model) - 1
options[:path] ||= table_name(model)
@@ -70,11 +109,18 @@
@sources << [model, options.dup]
self
end
+ def add_path(path, options)
+ @paths ||= []
+ @paths << [path, options]
+ self
+ end
+
def add_static(url, time = nil, frequency = nil, priority = nil)
+ warn 'BigSitemap#add_static is deprecated. Please use BigSitemap#add_path instead'
@static_pages ||= []
@static_pages << [url, time, frequency, priority]
self
end
@@ -87,42 +133,124 @@
end
rescue Errno::EACCES => e
STDERR.puts 'Lockfile exists' if $VERBOSE
end
- def table_name(model)
- model.table_name
+ def file_name(name=nil)
+ name = table_name(name) unless (name.nil? || name.is_a?(String))
+ prefix = 'sitemap'
+ prefix << '_' unless name.nil?
+ File.join(@options[:document_full], "#{prefix}#{name}")
end
- def file_name(name)
- name = table_name(name) unless name.is_a? String
- File.join(@options[:document_full], "sitemap_#{name}")
- end
-
def dir_files
- File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}")
+ File.join(@options[:document_full], "sitemap*.{xml,xml.gz}")
end
def clean
Dir[dir_files].each do |file|
FileUtils.rm file
end
+
self
end
- def generate
+ # TODO: Deprecate (move to private)
+ def generate(options={})
+ clean unless options[:partial_update]
+
+ # TODO: Ddeprecate
prepare_update
+ add_urls
+
+ # TODO: Deprecate
generate_models
generate_static
+
generate_sitemap_index
+
+ ping_search_engines
+
self
end
+ def add_urls
+ return self if Array(@paths).empty?
+
+ with_sitemap do |builder|
+ @paths.each do |path, options|
+ url = File.join @options[:base_url], File.basename(path)
+ builder.add_url! url, options
+ end
+ end
+
+ self
+ end
+
+ # Create a sitemap index document
+ def generate_sitemap_index(files=nil)
+ files ||= Dir[dir_files]
+
+ with_sitemap({:name => 'index', :type => 'index'}) do |sitemap|
+ for path in files
+ next if path =~ /index/
+ sitemap.add_url! url_for_sitemap(path), :last_modified => File.stat(path).mtime
+ end
+ end
+
+ self
+ end
+
+ def ping_search_engines
+ require 'net/http'
+ require 'cgi'
+
+ sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))
+
+ if @options[:ping_google]
+ Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
+ end
+
+ if @options[:ping_yahoo]
+ if @options[:yahoo_app_id]
+ Net::HTTP.get(
+ 'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
+ "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
+ )
+ else
+ STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
+ end
+ end
+
+ if @options[:ping_bing]
+ Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
+ end
+
+ if @options[:ping_ask]
+ Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
+ end
+ end
+
+ # TODO: Deprecate
+ def get_last_id(filename)
+ Dir["#{filename}*.{xml,xml.gz}"].map do |file|
+ file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
+ end.sort.last
+ end
+
+ private
+
+ # TODO: Deprecate
+ def table_name(model)
+ model.table_name
+ end
+
+ # TODO: Deprecate
def generate_models
for model, options in @sources
- with_sitemap(model, options.dup) do |sitemap|
+ with_sitemap(options.dup.merge({:name => model})) do |sitemap|
last_id = nil #id of last processed item
count_method = pick_method(model, COUNT_METHODS)
find_method = pick_method(model, FIND_METHODS)
raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil?
raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil?
@@ -130,11 +258,12 @@
find_options = {}
[:conditions, :limit, :joins, :select, :order, :include, :group].each do |key|
find_options[key] = options.delete(key)
end
- primary_column = options.delete(:primary_column)
+ primary_method = options.delete(:primary_column)
+ primary_column = "#{table_name(model)}.#{primary_method}"
count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil))
count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count
num_sitemaps = 1
num_batches = 1
@@ -177,86 +306,54 @@
options[:location].call(record)
else
File.join @options[:base_url], options[:path], record.send(param_method).to_s
end
- change_frequency = options[:change_frequency] || 'weekly'
+ change_frequency = options[:change_frequency]
freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency
priority = options[:priority]
pri = priority.is_a?(Proc) ? priority.call(record) : priority
- last_id = primary_column ? record.send(primary_column) : nil
- sitemap.add_url!(location, last_mod, freq, pri, last_id)
+ last_id = primary_column ? record.send(primary_method) : nil
+
+ sitemap.add_url!(location, {
+ :last_modified => last_mod,
+ :change_frequency => freq,
+ :priority => pri,
+ :part_number => last_id
+ }) if location
end
end
end
end
end
self
end
+ # TODO: Deprecate
def generate_static
return self if Array(@static_pages).empty?
- with_sitemap('static', :type => 'static') do |sitemap|
+ with_sitemap({:name => 'static', :type => 'static'}) do |sitemap|
@static_pages.each do |location, last_mod, freq, pri|
- sitemap.add_url!(location, last_mod, freq, pri)
+ sitemap.add_url!(location, {
+ :last_modified => last_mod,
+ :change_frequency => freq,
+ :priority => pri
+ })
end
end
self
end
- # Create a sitemap index document
- def generate_sitemap_index(files = nil)
- files ||= Dir[dir_files]
- with_sitemap 'index', :type => 'index' do |sitemap|
- for path in files
- next if path =~ /index/
- sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime)
- end
- end
- self
- end
-
- def ping_search_engines
- require 'net/http'
- require 'cgi'
-
- sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last))
-
- if @options[:ping_google]
- Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}")
- end
-
- if @options[:ping_yahoo]
- if @options[:yahoo_app_id]
- Net::HTTP.get(
- 'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" +
- "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}"
- )
- else
- STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided'
- end
- end
-
- if @options[:ping_bing]
- Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}")
- end
-
- if @options[:ping_ask]
- Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}")
- end
- end
-
- private
-
+ # TODO: Deprecate
def prepare_update
@files_to_move = []
@sources.each do |model, options|
if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename]))
primary_column_value = escape_if_string last_id #escape '
- options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
+ options[:conditions] = [options[:conditions], "(#{table_name(model)}.#{primary_column} >= #{primary_column_value})"].compact.join(' AND ')
options[:start_part_id] = last_id
end
end
end
@@ -268,16 +365,18 @@
def unlock!(lock_file = 'generator.lock')
lock_file = File.join(@options[:document_full], lock_file)
FileUtils.rm lock_file
end
- def with_sitemap(name, options={})
- options[:filename] ||= file_name(name)
- options[:type] ||= 'sitemap'
- options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
- options[:gzip] ||= @options[:gzip]
- options[:indent] = options[:gzip] ? 0 : 2
+ def with_sitemap(options={})
+ options[:filename] ||= file_name(options[:name])
+ options[:type] ||= 'sitemap'
+ options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym]
+ options[:gzip] ||= @options[:gzip]
+ options[:indent] ||= 2
+ options[:partial_update] ||= @options[:partial_update]
+ options[:start_part_id] ||= first_id_of_last_sitemap
sitemap = if options[:type] == 'index'
IndexBuilder.new(options)
elsif options[:geo]
options[:filename] << '_kml'
@@ -288,20 +387,14 @@
begin
yield sitemap
ensure
sitemap.close!
- @sitemap_files.concat sitemap.paths!
+ @sitemap_files.concat sitemap.filepaths!
end
end
- def get_last_id(filename)
- Dir["#{filename}*.{xml,xml.gz}"].map do |file|
- file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i
- end.sort.last
- end
-
def pick_method(model, candidates)
method = nil
candidates.each do |candidate|
if model.respond_to? candidate
method = candidate
@@ -309,10 +402,11 @@
end
end
method
end
+ # TODO: Deprecate
def escape_if_string(value)
(value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'"
end
def url_for_sitemap(path)
@@ -321,40 +415,23 @@
end
class BigSitemapRails < BigSitemap
+ def self.generate(options={}, &block)
+ raise 'No Rails Environment loaded' unless defined? Rails
- if defined?(Rails) && Rails.version < "3"
- include ActionController::UrlWriter
- end
-
- def initialize(options={})
- raise "No Rails Environment loaded" unless defined? Rails
- require 'action_controller'
-
- if Rails.version >= "3"
- self.class.send(:include, Rails.application.routes.url_helpers)
- end
-
DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options)
- super(options)
+ super(options, &block)
end
-
end
class BigSitemapMerb < BigSitemap
-
- def initialize(options={})
- raise "No Merb Environment loaded" unless defined? Merb
+ def self.generate(options={}, &block)
+ raise 'No Merb Environment loaded' unless defined? ::Merb
require 'extlib'
DEFAULTS.merge!(:document_root => "#{Merb.root}/public")
- super(options)
+ super(options, &block)
end
-
- def table_name(model)
- Extlib::Inflection.tableize(model.to_s)
- end
-
end