lib/big_sitemap.rb in big_sitemap-0.8.2 vs lib/big_sitemap.rb in big_sitemap-0.8.3

- old
+ new

@@ -5,11 +5,11 @@ class BigSitemap DEFAULTS = { :max_per_sitemap => Builder::MAX_URLS, :batch_size => 1001, - :path => 'sitemaps', + :document_path => 'sitemaps/', :gzip => true, # opinionated :ping_google => true, :ping_yahoo => false, # needs :yahoo_app_id @@ -22,40 +22,35 @@ TIMESTAMP_METHODS = [:updated_at, :updated_on, :updated, :created_at, :created_on, :created] PARAM_METHODS = [:to_param, :id] def initialize(options={}) @options = DEFAULTS.merge options + @options[:document_path] ||= @options[:path] #for legacy reasons - @default_url_options = options.delete(:default_url_options) || {} - if @options[:max_per_sitemap] <= 1 raise ArgumentError, '":max_per_sitemap" must be greater than 1' end if @options[:url_options] - @default_url_options.update @options[:url_options] - elsif @options[:base_url] - uri = URI.parse(@options[:base_url]) - @default_url_options[:host] = uri.host - @default_url_options[:port] = uri.port - @default_url_options[:protocol] = uri.scheme - else + @options[:base_url] = URI::Generic.build( {:scheme => "http"}.merge(@options.delete(:url_options)) ).to_s + end + + unless @options[:base_url] raise ArgumentError, 'you must specify either ":url_options" hash or ":base_url" string' end + @options[:url_path] ||= @options[:document_path] if @options[:batch_size] > @options[:max_per_sitemap] raise ArgumentError, '":batch_size" must be less than ":max_per_sitemap"' end - @options[:document_root] ||= document_root - - unless @options[:document_root] - raise ArgumentError, 'Document root must be specified with the ":document_root" option' + @options[:document_full] ||= File.join(@options[:document_root], @options[:document_path]) + unless @options[:document_full] + raise ArgumentError, 'Document root must be specified with the ":document_root" option, the full path with ":document_full"' end - @file_path = "#{@options[:document_root]}/#{strip_leading_slash(@options[:path])}" - Dir.mkdir(@file_path) unless File.exists? @file_path + Dir.mkdir(@options[:document_full]) unless File.exists?(@options[:document_full]) @sources = [] @models = [] @sitemap_files = [] end @@ -98,18 +93,19 @@ model.table_name end def file_name(name) name = table_name(name) unless name.is_a? String - "#{@file_path}/sitemap_#{name}" + File.join(@options[:document_full], "sitemap_#{name}") end - def document_root + def dir_files + File.join(@options[:document_full], "sitemap_*.{xml,xml.gz}") end def clean - Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"].each do |file| + Dir[dir_files].each do |file| FileUtils.rm file end self end @@ -159,12 +155,12 @@ limit = (count - offset) < @options[:batch_size] ? (count - offset) : @options[:batch_size] find_options.update(:limit => limit, :offset => offset) if num_batches > 1 if last_id && primary_column find_options.update(:limit => limit, :offset => nil) - primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape ' - find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > '#{primary_column_value}')"].compact.join(' AND ')) + primary_column_value = escape_if_string last_id #escape ' + find_options.update(:conditions => [find_options[:conditions], "(#{primary_column} > #{primary_column_value})"].compact.join(' AND ')) end model.send(find_method, find_options).each do |record| last_mod = options[:last_modified] if last_mod.is_a?(Proc) @@ -174,16 +170,16 @@ last_mod = last_mod_method.nil? ? Time.now : record.send(last_mod_method) end param_method = pick_method(record, PARAM_METHODS) - location = options[:location] - if location.is_a?(Proc) - location = location.call(record) - else - location = "#{root_url}/#{strip_leading_slash(options[:path])}/#{record.send(param_method)}" - end + location = + if options[:location].is_a?(Proc) + options[:location].call(record) + else + File.join @options[:base_url], options[:path], record.send(param_method).to_s + end change_frequency = options[:change_frequency] || 'weekly' freq = change_frequency.is_a?(Proc) ? change_frequency.call(record) : change_frequency priority = options[:priority] @@ -209,11 +205,11 @@ self end # Create a sitemap index document def generate_sitemap_index(files = nil) - files ||= Dir["#{@file_path}/sitemap_*.{xml,xml.gz}"] + files ||= Dir[dir_files] with_sitemap 'index', :type => 'index' do |sitemap| for path in files next if path =~ /index/ sitemap.add_url!(url_for_sitemap(path), File.stat(path).mtime) end @@ -236,11 +232,11 @@ Net::HTTP.get( 'search.yahooapis.com', "/SiteExplorerService/V1/updateNotification?" + "appid=#{@options[:yahoo_app_id]}&url=#{sitemap_uri}" ) else - $stderr.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided' + STDERR.puts 'unable to ping Yahoo: no ":yahoo_app_id" provided' end end if @options[:ping_bing] Net::HTTP.get('www.bing.com', "/webmaster/ping.aspx?siteMap=#{sitemap_uri}") @@ -249,40 +245,31 @@ if @options[:ping_ask] Net::HTTP.get('submissions.ask.com', "/ping?sitemap=#{sitemap_uri}") end end - def root_url - @root_url ||= begin - url = '' - url << (@default_url_options[:protocol] || 'http') - url << '://' unless url.match('://') - url << @default_url_options[:host] - url << ":#{port}" if port = @default_url_options[:port] and port != 80 - url - end - end - private def prepare_update @files_to_move = [] @sources.each do |model, options| - if options[:partial_update] && primary_column = options[:primary_column] && last_id = get_last_id(options[:filename]) - primary_column_value = last_id.to_s.gsub("'", %q(\\\')) #escape ' - options[:conditions] = [options[:conditions], "(#{primary_column} >= '#{primary_column_value}')"].compact.join(' AND ') + if options[:partial_update] && (primary_column = options[:primary_column]) && (last_id = get_last_id(options[:filename])) + primary_column_value = escape_if_string last_id #escape ' + options[:conditions] = [options[:conditions], "(#{primary_column} >= #{primary_column_value})"].compact.join(' AND ') options[:start_part_id] = last_id end end end def lock!(lock_file = 'generator.lock') - File.open("#{@file_path}/#{lock_file}", 'w', File::EXCL) + lock_file = File.join(@options[:document_full], lock_file) + File.open(lock_file, 'w', File::EXCL) end def unlock!(lock_file = 'generator.lock') - FileUtils.rm "#{@file_path}/#{lock_file}" + lock_file = File.join(@options[:document_full], lock_file) + FileUtils.rm lock_file end def with_sitemap(name, options={}) options[:filename] ||= file_name(name) options[:type] ||= 'sitemap' @@ -305,14 +292,10 @@ sitemap.close! @sitemap_files.concat sitemap.paths! end end - def strip_leading_slash(str) - str.sub(/^\//, '') - end - def get_last_id(filename) Dir["#{filename}*.{xml,xml.gz}"].map do |file| file.to_s.scan(/#{filename}_(.+).xml/).flatten.last.to_i end.sort.last end @@ -326,46 +309,52 @@ end end method end + def escape_if_string(value) + (value.to_i.to_s == value.to_s) ? value.to_i : "'#{value.gsub("'", %q(\\\'))}'" + end + def url_for_sitemap(path) - [root_url, @options[:path], File.basename(path)].compact.join('/') + File.join @options[:base_url], @options[:url_path], File.basename(path) end end - class BigSitemapRails < BigSitemap - include ActionController::UrlWriter if defined? Rails + if defined?(Rails) && Rails.version < "3" + include ActionController::UrlWriter + end def initialize(options={}) + raise "No Rails Environment loaded" unless defined? Rails require 'action_controller' - super options.merge(:default_url_options => default_url_options) - end + if Rails.version >= "3" + self.class.send(:include, Rails.application.routes.url_helpers) + end - def document_root - "#{Rails.root}/public" + DEFAULTS.merge!(:document_root => "#{Rails.root}/public", :url_options => default_url_options) + super(options) end + end - class BigSitemapMerb < BigSitemap def initialize(options={}) + raise "No Merb Environment loaded" unless defined? Merb require 'extlib' - super - end - def document_root - "#{Merb.root}/public" + DEFAULTS.merge!(:document_root => "#{Merb.root}/public") + super(options) end def table_name(model) Extlib::Inflection.tableize(model.to_s) end -end \ No newline at end of file +end