lib/big_sitemap.rb in big_sitemap-0.8.3 vs lib/big_sitemap.rb in big_sitemap-1.0.0

- old
+ new

@@ -4,45 +4,71 @@ require 'big_sitemap/builder' class BigSitemap DEFAULTS = { :max_per_sitemap => Builder::MAX_URLS, - :batch_size => 1001, - :document_path => 'sitemaps/', + :batch_size => 1001, # TODO: Deprecate + :document_path => '/', :gzip => true, - # opinionated + # Opinionated :ping_google => true, :ping_yahoo => false, # needs :yahoo_app_id :ping_bing => false, :ping_ask => false } + # TODO: Deprecate COUNT_METHODS = [:count_for_sitemap, :count] FIND_METHODS = [:find_for_sitemap, :all] TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created] PARAM_METHODS = [:to_param, :id] + class << self + def generate(options={}, &block) + @sitemap = self.new(options) + + @sitemap.first_id_of_last_sitemap = first_id_of_last_sitemap + + instance_eval(&block) + + @sitemap.with_lock do + @sitemap.generate(options) + end + end + + private + + def first_id_of_last_sitemap + Dir["#{@sitemap.document_full}sitemap*.{xml,xml.gz}"].map do |file| + file.to_s.scan(/sitemap_(.+).xml/).flatten.last.to_i + end.sort.last + end + + def add(path, options={}) + @sitemap.add_path(path, options) + end + end + def initialize(options={}) @options = DEFAULTS.merge options - @options[:document_path] ||= @options[:path] #for legacy reasons if @options[:max_per_sitemap] <= 1 raise ArgumentError, '":max_per_sitemap" must be greater than 1' end - if @options[:url_options] + if @options[:url_options] && !@options[:base_url] @options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s end unless @options[:base_url] raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string' end @options[:url_path] ||= @options[:document_path] - if @options[:batch_size] > @options[:max_per_sitemap] - raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"' + unless @options[:document_root] + raise ArgumentError, 'Document root must be specified with the ":document_root" option"' end @options[:document_full] ||= File.join(@options[:document_root], @options[:document_path]) unless @options[:document_full] raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"' @@ -53,11 +79,24 @@ @sources = [] @models = [] @sitemap_files = [] end + def first_id_of_last_sitemap + @first_id_of_last_sitemap + end + + def first_id_of_last_sitemap=(first_id) + @first_id_of_last_sitemap = first_id + end + + def document_full + @options[:document_full] + end + def add(model, options={}) + warn 'BigSitemap#add is deprecated. Please use BigSitemap.generate and call add inside the block (in BigSitemap 1.0.0+). You will have to perform the find and generate the path for each record yourself.' @models << model filename_suffix = @models.count(model) - 1 options[:path] ||= table_name(model) @@ -70,11 +109,18 @@ @sources << [model, options.dup] self end + def add_path(path, options) + @paths ||= [] + @paths << [path, options] + self + end + def add_static(url, time = nil, frequency = nil, priority = nil) + warn 'BigSitemap#add_static is deprecated. Please use BigSitemap#add_path instead' @static_pages ||= [] @static_pages << [url, time, frequency, priority] self end @@ -87,42 +133,124 @@ end rescue Errno::EACCES => e STDERR.puts 'Lockfile exists' if $VERBOSE end - def table_name(model) - model.table_name + def file_name(name=nil) + name = table_name(name) unless (name.nil? || name.is_a?(String)) + prefix = 'sitemap' + prefix << '_' unless name.nil? + File.join(@options[:document_full], "#{prefix}#{name}") end - def file_name(name) - name = table_name(name) unless name.is_a? String - File.join(@options[:document_full], "sitemap_#{name}") - end - def dir_files - File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}") + File.join(@options[:document_full], "sitemap*.{xml,xml.gz}") end def clean Dir[dir_files].each do |file| FileUtils.rm file end + self end - def generate + # TODO: Deprecate (move to private) + def generate(options={}) + clean unless options[:partial_update] + + # TODO: Ddeprecate prepare_update + add_urls + + # TODO: Deprecate generate_models generate_static + generate_sitemap_index + + ping_search_engines + self end + def add_urls + return self if Array(@paths).empty? + + with_sitemap do |builder| + @paths.each do |path, options| + url = File.join @options[:base_url], File.basename(path) + builder.add_url! url, options + end + end + + self + end + + # Create a sitemap index document + def generate_sitemap_index(files=nil) + files ||= Dir[dir_files] + + with_sitemap({:name => 'index', :type => 'index'}) do |sitemap| + for path in files + next if path =~ /index/ + sitemap.add_url! url_for_sitemap(path), :last_modified => File.stat(path).mtime + end + end + + self + end + + def ping_search_engines + require 'net/http' + require 'cgi' + + sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last)) + + if @options[:ping_google] + Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}") + end + + if @options[:ping_yahoo] + if @options[:yahoo_app_id] + Net::HTTP.get( + 'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" + + "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}" + ) + else + STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided' + end + end + + if @options[:ping_bing] + Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}") + end + + if @options[:ping_ask] + Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}") + end + end + + # TODO: Deprecate + def get_last_id(filename) + Dir["#{filename}*.{xml,xml.gz}"].map do |file| + file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i + end.sort.last + end + + private + + # TODO: Deprecate + def table_name(model) + model.table_name + end + + # TODO: Deprecate def generate_models for model, options in @sources - with_sitemap(model, options.dup) do |sitemap| + with_sitemap(options.dup.merge({:name => model})) do |sitemap| last_id = nil #id of last processed item count_method = pick_method(model, COUNT_METHODS) find_method = pick_method(model, FIND_METHODS) raise ArgumentError, "#{model} must provide a count_for_sitemap class method" if count_method.nil? raise ArgumentError, "#{model} must provide a find_for_sitemap class method" if find_method.nil? @@ -130,11 +258,12 @@ find_options = {} [:conditions, :limit, :joins, :select, :order, :include, :group].each do |key| find_options[key] = options.delete(key) end - primary_column = options.delete(:primary_column) + primary_method = options.delete(:primary_column) + primary_column = "#{table_name(model)}.#{primary_method}" count = model.send(count_method, find_options.merge(:select => (primary_column || '*'), :include => nil)) count = find_options[:limit].to_i if find_options[:limit] && find_options[:limit].to_i < count num_sitemaps = 1 num_batches = 1 @@ -177,86 +306,54 @@ options[:location].call(record) else File.join @options[:base_url], options[:path], record.send(param_method).to_s end - change_frequency = options[:change_frequency] || 'weekly' + change_frequency = options[:change_frequency] freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency priority = options[:priority] pri = priority.is_a?(Proc) ? priority.call(record) : priority - last_id = primary_column ? record.send(primary_column) : nil - sitemap.add_url!(location, last_mod, freq, pri, last_id) + last_id = primary_column ? record.send(primary_method) : nil + + sitemap.add_url!(location, { + :last_modified => last_mod, + :change_frequency => freq, + :priority => pri, + :part_number => last_id + }) if location end end end end end self end + # TODO: Deprecate def generate_static return self if Array(@static_pages).empty? - with_sitemap('static', :type => 'static') do |sitemap| + with_sitemap({:name => 'static', :type => 'static'}) do |sitemap| @static_pages.each do |location, last_mod, freq, pri| - sitemap.add_url!(location, last_mod, freq, pri) + sitemap.add_url!(location, { + :last_modified => last_mod, + :change_frequency => freq, + :priority => pri + }) end end self end - # Create a sitemap index document - def generate_sitemap_index(files = nil) - files ||= Dir[dir_files] - with_sitemap 'index', :type => 'index' do |sitemap| - for path in files - next if path =~ /index/ - sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime) - end - end - self - end - - def ping_search_engines - require 'net/http' - require 'cgi' - - sitemap_uri = CGI::escape(url_for_sitemap(@sitemap_files.last)) - - if @options[:ping_google] - Net::HTTP.get('www.google.com', "/webmasters/tools/ping?sitemap=#{sitemap_uri}") - end - - if @options[:ping_yahoo] - if @options[:yahoo_app_id] - Net::HTTP.get( - 'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" + - "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}" - ) - else - STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided' - end - end - - if @options[:ping_bing] - Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}") - end - - if @options[:ping_ask] - Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}") - end - end - - private - + # TODO: Deprecate def prepare_update @files_to_move = [] @sources.each do |model, options| if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename])) primary_column_value = escape_if_string last_id #escape ' - options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ') + options[:conditions] = [options[:conditions], "(#{table_name(model)}.#{primary_column} >= #{primary_column_value})"].compact.join(' AND ') options[:start_part_id] = last_id end end end @@ -268,16 +365,18 @@ def unlock!(lock_file = 'generator.lock') lock_file = File.join(@options[:document_full], lock_file) FileUtils.rm lock_file end - def with_sitemap(name, options={}) - options[:filename] ||= file_name(name) - options[:type] ||= 'sitemap' - options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym] - options[:gzip] ||= @options[:gzip] - options[:indent] = options[:gzip] ? 0 : 2 + def with_sitemap(options={}) + options[:filename] ||= file_name(options[:name]) + options[:type] ||= 'sitemap' + options[:max_urls] ||= @options["max_per_#{options[:type]}".to_sym] + options[:gzip] ||= @options[:gzip] + options[:indent] ||= 2 + options[:partial_update] ||= @options[:partial_update] + options[:start_part_id] ||= first_id_of_last_sitemap sitemap = if options[:type] == 'index' IndexBuilder.new(options) elsif options[:geo] options[:filename] << '_kml' @@ -288,20 +387,14 @@ begin yield sitemap ensure sitemap.close! - @sitemap_files.concat sitemap.paths! + @sitemap_files.concat sitemap.filepaths! end end - def get_last_id(filename) - Dir["#{filename}*.{xml,xml.gz}"].map do |file| - file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i - end.sort.last - end - def pick_method(model, candidates) method = nil candidates.each do |candidate| if model.respond_to? candidate method = candidate @@ -309,10 +402,11 @@ end end method end + # TODO: Deprecate def escape_if_string(value) (value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'" end def url_for_sitemap(path) @@ -321,40 +415,23 @@ end class BigSitemapRails < BigSitemap + def self.generate(options={}, &block) + raise 'No Rails Environment loaded' unless defined? Rails - if defined?(Rails) && Rails.version < "3" - include ActionController::UrlWriter - end - - def initialize(options={}) - raise "No Rails Environment loaded" unless defined? Rails - require 'action_controller' - - if Rails.version >= "3" - self.class.send(:include, Rails.application.routes.url_helpers) - end - DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options) - super(options) + super(options, &block) end - end class BigSitemapMerb < BigSitemap - - def initialize(options={}) - raise "No Merb Environment loaded" unless defined? Merb + def self.generate(options={}, &block) + raise 'No Merb Environment loaded' unless defined? ::Merb require 'extlib' DEFAULTS.merge!(:document_root => "#{Merb.root}/public") - super(options) + super(options, &block) end - - def table_name(model) - Extlib::Inflection.tableize(model.to_s) - end - end