lib/big_sitemap/builder.rb in big_sitemap-0.5.1 vs lib/big_sitemap/builder.rb in big_sitemap-0.8.1
- old
+ new
@@ -1,113 +1,122 @@
-require 'builder'
+require 'fileutils'
require 'zlib'
class BigSitemap
- class Builder < Builder::XmlMarkup
- NAMESPACE = 'http://www.sitemaps.org/schemas/sitemap/0.9'
+ class Builder
MAX_URLS = 50000
+ HEADER_ATTRIBUTES = {
+ 'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9',
+ 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
+ 'xsi:schemaLocation' => "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+ }
def initialize(options)
- @gzip = options.delete(:gzip)
- @max_urls = options.delete(:max_urls) || MAX_URLS
- @index = options.delete(:index)
- @paths = []
- @parts = 0
+ @gzip = options.delete(:gzip)
+ @max_urls = options.delete(:max_urls) || MAX_URLS
+ @type = options.delete(:type)
+ @paths = []
+ @parts = options.delete(:start_part_id) || 0
+ @custom_part_nr = options.delete(:partial_update)
- if @filename = options.delete(:filename)
- options[:target] = _get_writer
- end
+ @filename = options.delete(:filename)
+ @current_filename = nil
+ @tmp_filename = nil
+ @target = _get_writer
- super(options)
-
+ @level = 0
@opened_tags = []
_init_document
end
- def add_url!(url, time = nil, frequency = nil, priority = nil)
- _rotate if @max_urls == @urls
+ def add_url!(url, time = nil, frequency = nil, priority = nil, part_nr = nil)
+ _rotate(part_nr) if @max_urls == @urls
- tag!(@index ? 'sitemap' : 'url') do
- loc url
- # W3C format is the subset of ISO 8601
- lastmod(time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00')) unless time.nil?
- changefreq(frequency) unless frequency.nil?
- priority(priority) unless priority.nil?
- end
+ _open_tag 'url'
+ tag! 'loc', url
+ tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
+ tag! 'changefreq', frequency if frequency
+ tag! 'priority', priority if priority
+ _close_tag 'url'
+
@urls += 1
end
+ def paths!
+ @paths
+ end
+
def close!
_close_document
target!.close if target!.respond_to?(:close)
+ File.delete(@current_filename) if File.exists?(@current_filename)
+ File.rename(@tmp_filename, @current_filename)
end
- def paths!
- @paths
+ def target!
+ @target
end
private
def _get_writer
- if @filename
- filename = @filename.dup
- filename << "_#{@parts}" if @parts > 0
- filename << '.xml'
- filename << '.gz' if @gzip
- _open_writer(filename)
- else
- target!
- end
+ filename = @filename.dup
+ filename << "_#{@parts}" if @parts > 0
+ filename << '.xml'
+ filename << '.gz' if @gzip
+ _open_writer(filename)
end
def _open_writer(filename)
- file = File.open(filename, 'w+')
+ @current_filename = filename
+ @tmp_filename = filename + ".tmp"
@paths << filename
- @gzip ? Zlib::GzipWriter.new(file) : file
+ file = ::File.open(@tmp_filename, 'w+')
+ @gzip ? ::Zlib::GzipWriter.new(file) : file
end
- def _init_document
+ def _init_document( name = 'urlset', attrs = HEADER_ATTRIBUTES)
@urls = 0
- instruct!
- _open_tag(@index ? 'sitemapindex' : 'urlset', :xmlns => NAMESPACE)
+ target!.print '<?xml version="1.0" encoding="UTF-8"?>'
+ _newline
+ _open_tag name, attrs
end
- def _rotate
+ def _rotate(part_nr = nil)
# write out the current document and start writing into a new file
close!
- @parts += 1
+ @parts = (part_nr && @custom_part_nr) ? part_nr : @parts + 1
@target = _get_writer
_init_document
end
- # add support for:
- # xml.open_foo!(attrs)
- # xml.close_foo!
- def method_missing(method, *args, &block)
- if method.to_s =~ /^(open|close)_(.+)!$/
- operation, name = $1, $2
- name = "#{name}:#{args.shift}" if Symbol === args.first
-
- if 'open' == operation
- _open_tag(name, args.first)
- else
- _close_tag(name)
- end
- else
- super
- end
- end
-
# opens a tag, bumps up level but doesn't require a block
- def _open_tag(name, attrs)
+ def _open_tag(name, attrs = {})
_indent
_start_tag(name, attrs)
_newline
@level += 1
@opened_tags << name
end
+ def _start_tag(name, attrs = {})
+ attrs = attrs.map { |attr,value| %Q( #{attr}="#{value}") }.join('')
+ target!.print "<#{name}#{attrs}>"
+ end
+
+ def tag!(name, content, attrs = {})
+ _indent
+ _start_tag(name, attrs)
+ target!.print content.to_s.gsub('&', '&')
+ _end_tag(name)
+ _newline
+ end
+
+ def _end_tag(name)
+ target!.print "</#{name}>"
+ end
+
# closes a tag block by decreasing the level and inserting a close tag
def _close_tag(name)
@opened_tags.pop
@level -= 1
_indent
@@ -118,7 +127,42 @@
def _close_document
for name in @opened_tags.reverse
_close_tag(name)
end
end
+
+ def _indent
+ return if @gzip
+ target!.print " " * @level
+ end
+
+ def _newline
+ return if @gzip
+ target!.puts ''
+ end
end
+
+ class IndexBuilder < Builder
+ def _init_document(name = 'sitemapindex', attrs = {'xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9'})
+ attrs.merge('xmlns:geo' => "http://www.google.com/geo/schemas/sitemap/1.0")
+ super(name, attrs)
+ end
+
+ def add_url!(url, time = nil)
+ _open_tag 'sitemap'
+ tag! 'loc', url
+ tag! 'lastmod', time.utc.strftime('%Y-%m-%dT%H:%M:%S+00:00') if time
+ _close_tag 'sitemap'
+ end
+ end
+
+ class GeoBuilder < Builder
+ #_build_geo if @geo
+
+ # def _build_geo
+ # geo :geo do
+ # geo :format, 'kml'
+ # end
+ # end
+ end
+
end