lib/big_sitemap/builder.rb in big_sitemap-0.5.1 vs lib/big_sitemap/builder.rb in big_sitemap-0.8.1

- old
+ new

@@ -1,113 +1,122 @@ -require 'builder' +require 'fileutils' require 'zlib' class BigSitemap - class Builder < Builder::XmlMarkup - NAMESPACE = 'http://www.sitemaps.org/schemas/sitemap/0.9' + class Builder MAX_URLS = 50000 + HEADER_ATTRIBUTES = { + 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9', + 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", + 'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" + } def initialize(options) - @gzip = options.delete(:gzip) - @max_urls = options.delete(:max_urls) || MAX_URLS - @index = options.delete(:index) - @paths = [] - @parts = 0 + @gzip = options.delete(:gzip) + @max_urls = options.delete(:max_urls) || MAX_URLS + @type = options.delete(:type) + @paths = [] + @parts = options.delete(:start_part_id) || 0 + @custom_part_nr = options.delete(:partial_update) - if @filename = options.delete(:filename) - options[:target] = _get_writer - end + @filename = options.delete(:filename) + @current_filename = nil + @tmp_filename = nil + @target = _get_writer - super(options) - + @level = 0 @opened_tags = [] _init_document end - def add_url!(url, time = nil, frequency = nil, priority = nil) - _rotate if @max_urls == @urls + def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil) + _rotate(part_nr) if @max_urls == @urls - tag!(@index ? 'sitemap' : 'url') do - loc url - # W3C format is the subset of ISO 8601 - lastmod(time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00')) unless time.nil? - changefreq(frequency) unless frequency.nil? - priority(priority) unless priority.nil? - end + _open_tag 'url' + tag! 'loc', url + tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time + tag! 'changefreq', frequency if frequency + tag! 'priority', priority if priority + _close_tag 'url' + @urls += 1 end + def paths! + @paths + end + def close! _close_document target!.close if target!.respond_to?(:close) + File.delete(@current_filename) if File.exists?(@current_filename) + File.rename(@tmp_filename, @current_filename) end - def paths! - @paths + def target! + @target end private def _get_writer - if @filename - filename = @filename.dup - filename << "_#{@parts}" if @parts > 0 - filename << '.xml' - filename << '.gz' if @gzip - _open_writer(filename) - else - target! - end + filename = @filename.dup + filename << "_#{@parts}" if @parts > 0 + filename << '.xml' + filename << '.gz' if @gzip + _open_writer(filename) end def _open_writer(filename) - file = File.open(filename, 'w+') + @current_filename = filename + @tmp_filename = filename + ".tmp" @paths << filename - @gzip ? Zlib::GzipWriter.new(file) : file + file = ::File.open(@tmp_filename, 'w+') + @gzip ? ::Zlib::GzipWriter.new(file) : file end - def _init_document + def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES) @urls = 0 - instruct! - _open_tag(@index ? 'sitemapindex' : 'urlset', :xmlns => NAMESPACE) + target!.print '<?xml version="1.0" encoding="UTF-8"?>' + _newline + _open_tag name, attrs end - def _rotate + def _rotate(part_nr = nil) # write out the current document and start writing into a new file close! - @parts += 1 + @parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1 @target = _get_writer _init_document end - # add support for: - # xml.open_foo!(attrs) - # xml.close_foo! - def method_missing(method, *args, &block) - if method.to_s =~ /^(open|close)_(.+)!$/ - operation, name = $1, $2 - name = "#{name}:#{args.shift}" if Symbol === args.first - - if 'open' == operation - _open_tag(name, args.first) - else - _close_tag(name) - end - else - super - end - end - # opens a tag, bumps up level but doesn't require a block - def _open_tag(name, attrs) + def _open_tag(name, attrs = {}) _indent _start_tag(name, attrs) _newline @level += 1 @opened_tags << name end + def _start_tag(name, attrs = {}) + attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('') + target!.print "<#{name}#{attrs}>" + end + + def tag!(name, content, attrs = {}) + _indent + _start_tag(name, attrs) + target!.print content.to_s.gsub('&', '&amp;') + _end_tag(name) + _newline + end + + def _end_tag(name) + target!.print "</#{name}>" + end + # closes a tag block by decreasing the level and inserting a close tag def _close_tag(name) @opened_tags.pop @level -= 1 _indent @@ -118,7 +127,42 @@ def _close_document for name in @opened_tags.reverse _close_tag(name) end end + + def _indent + return if @gzip + target!.print " " * @level + end + + def _newline + return if @gzip + target!.puts '' + end end + + class IndexBuilder < Builder + def _init_document(name = 'sitemapindex', attrs = {'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'}) + attrs.merge('xmlns:geo' => "http://www.google.com/geo/schemas/sitemap/1.0") + super(name, attrs) + end + + def add_url!(url, time = nil) + _open_tag 'sitemap' + tag! 'loc', url + tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time + _close_tag 'sitemap' + end + end + + class GeoBuilder < Builder + #_build_geo if @geo + + # def _build_geo + # geo :geo do + # geo :format, 'kml' + # end + # end + end + end