require 'builder'

# A LinkSet provisions a bunch of links to sitemap files.  It also writes the index file
# which lists all the sitemap files written.
module SitemapGenerator
  class LinkSet
    @@requires_finalization_opts = [:filename, :sitemaps_path, :sitemaps_namer, :sitemaps_host]
    @@new_location_opts = [:filename, :sitemaps_path, :sitemaps_namer]

    attr_reader :default_host, :sitemaps_path, :filename
    attr_accessor :verbose, :yahoo_app_id, :include_root, :include_index, :sitemaps_host

    # Add links to the link set by evaluating the block.  The block should
    # contains calls to sitemap methods like:
    # * +add+   - Add a link to the current sitemap
    # * +group+ - Start a new group of sitemaps
    #
    # == Options
    #
    # Any option supported by +new+ can be passed.  The options will be
    # set on the instance using the accessor methods.  This is provided mostly
    # as a convenience.
    #
    # In addition to the options to +new+, the following options are supported:
    # * <tt>:finalize</tt> - The sitemaps are written as they get full and at the end
    # of the block.  Pass +false+ as the value to prevent the sitemap or sitemap index
    # from being finalized.  Default is +true+.
    def create(opts={}, &block)
      @sitemap_index = nil if @sitemap_index && @sitemap_index.finalized? && !@protect_index
      @sitemap = nil if @sitemap && @sitemap.finalized?
      set_options(opts)
      start_time = Time.now if @verbose
      interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
      finalize!
      end_time = Time.now if @verbose
      puts sitemap_index.stats_summary(:time_taken => end_time - start_time) if @verbose
      self
    end

    # Dreprecated.  Use create.
    def add_links(&block)
      @yield_sitemap = true
      create(&block)
      @yield_sitemap = false
    end

    # Constructor
    #
    # == Options:
    # * <tt>:default_host</tt> - host including protocol to use in all sitemap links
    #   e.g. http://en.google.ca
    #
    # * <tt>:public_path</tt> - Full or relative path to the directory to write sitemaps into.
    #   Defaults to the <tt>public/</tt> directory in your application root directory or
    #   the current working directory.
    #
    # * <tt>:sitemaps_host</tt> - host (including protocol) to use in links to the sitemaps.  Useful if your sitemaps
    #   are hosted o different server e.g. 'http://amazon.aws.com/'
    #
    # * <tt>:sitemaps_path</tt> - path fragment within public to write sitemaps
    #   to e.g. 'en/'.  Sitemaps are written to <tt>public_path</tt> + <tt>sitemaps_path</tt>
    #
    # * <tt>:filename</tt> - symbol giving the base name for files (default <tt>:sitemap</tt>).
    #   The sitemap names are generated like "#{filename}1.xml.gz", "#{filename}2.xml.gz"
    #   and the index name is like "#{filename}_index.xml.gz".
    #
    # * <tt>:sitemaps_namer</tt> - A +SitemapNamer+ instance for generating the sitemap names.
    #
    # * <tt>:include_root</tt> - whether to include the root url i.e. '/' in each group of sitemaps.
    #   Default is true.
    #
    # * <tt>:include_index</tt> - whether to include the sitemap index URL in each group of sitemaps.
    #   Default is true.
    #
    # * <tt>:verbose</tt> - If +true+, output a summary line for each sitemap and sitemap
    #   index that is created.  Default is +false+.
    def initialize(options={})
      options.reverse_merge!({
        :include_root => true,
        :include_index => true,
        :filename => :sitemap,
        :verbose => false
      })
      options.each_pair { |k, v| instance_variable_set("@#{k}".to_sym, v) }

      # If an index is passed in, protect it from modification.
      # Sitemaps can be added to the index but nothing else can be changed.
      if options[:sitemap_index]
        @protect_index = true
      end
    end

    # Add a link to a Sitemap.  If a new Sitemap is required, one will be created for
    # you.
    #
    # link - string link e.g. '/merchant', '/article/1' or whatever.
    # options - see README.
    #   host - host for the link, defaults to your <tt>default_host</tt>.
    def add(link, options={})
      add_default_links if !@added_default_links
      sitemap.add(link, options.reverse_merge!(:host => @default_host))
    rescue SitemapGenerator::SitemapFullError
      finalize_sitemap!
      retry
    rescue SitemapGenerator::SitemapFinalizedError
      @sitemap = sitemap.new
      retry
    end

    # Create a new group of sitemaps.  Returns a new LinkSet instance with options set on it.
    #
    # All groups share this LinkSet's sitemap index, which is not modified by any of the options
    # passed to +group+.
    #
    # === Options
    # Any of the options to LinkSet.new.  Except for <tt>:public_path</tt> which is shared
    # by all groups.
    #
    # The current options are inherited by the new group of sitemaps.  The only exceptions
    # being <tt>:include_index</tt> and <tt>:include_root</tt> which default to +false+.
    #
    # Pass a block to add links to the new LinkSet.  If you pass a block the sitemaps will
    # be finalized when the block returns.
    #
    # If you are not changing any of the location settings like <tt>filename<tt>,
    # <tt>sitemaps_path</tt>, <tt>sitemaps_host</tt> or <tt>sitemaps_namer</tt>
    # the current sitemap will be used in the group.  All of the options you have
    # specified which affect the way the links are generated will still be applied
    # for the duration of the group.
    def group(opts={}, &block)
      @created_group = true
      original_opts = opts.dup

      if (@@requires_finalization_opts & original_opts.keys).empty?
        # If no new filename or path is specified reuse the default sitemap file.
        # A new location object will be set on it for the duration of the group.
        opts[:sitemap] = sitemap
      elsif original_opts.key?(:sitemaps_host) && (@@new_location_opts & original_opts.keys).empty?
        # If no location options are provided we are creating the next sitemap in the
        # current series, so finalize and inherit the namer.
        finalize_sitemap!
        opts[:sitemaps_namer] = sitemaps_namer
      end

      opts = options_for_group(opts)
      @group = SitemapGenerator::LinkSet.new(opts)
      if opts.key?(:sitemap)
        # If the group is sharing the current sitemap, set the
        # new location options on the location object.
        @original_location = @sitemap.location.dup
        @sitemap.location.merge!(@group.sitemap_location)
        if block_given?
          @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
          @sitemap.location.merge!(@original_location)
        end
      elsif block_given?
        @group.interpreter.eval(:yield_sitemap => @yield_sitemap || SitemapGenerator.yield_sitemap?, &block)
        @group.finalize_sitemap!
      end
      @group
    end

    # Ping search engines.
    #
    # @see http://en.wikipedia.org/wiki/Sitemap_index
    def ping_search_engines
      require 'open-uri'

      sitemap_index_url = CGI.escape(sitemap_index.location.url)
      search_engines = {
        :google         => "http://www.google.com/webmasters/sitemaps/ping?sitemap=#{sitemap_index_url}",
        :yahoo          => "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=#{sitemap_index_url}&appid=#{yahoo_app_id}",
        :ask            => "http://submissions.ask.com/ping?sitemap=#{sitemap_index_url}",
        :bing           => "http://www.bing.com/webmaster/ping.aspx?siteMap=#{sitemap_index_url}",
        :sitemap_writer => "http://www.sitemapwriter.com/notify.php?crawler=all&url=#{sitemap_index_url}"
      }

      puts "\n" if verbose
      search_engines.each do |engine, link|
        next if engine == :yahoo && !self.yahoo_app_id
        begin
          Timeout::timeout(10) {
            open(link)
          }
          puts "Successful ping of #{engine.to_s.titleize}" if verbose
        rescue Timeout::Error, StandardError => e
          puts "Ping failed for #{engine.to_s.titleize}: #{e.inspect} (URL #{link})" if verbose
        end
      end

      if !self.yahoo_app_id && verbose
        puts "\n"
        puts <<-END.gsub(/^\s+/, '')
          To ping Yahoo you require a Yahoo AppID.  Add it to your config/sitemap.rb with:

          SitemapGenerator::Sitemap.yahoo_app_id = "my_app_id"

          For more information see http://developer.yahoo.com/search/siteexplorer/V1/updateNotification.html
        END
      end
    end

    # Return a count of the total number of links in all sitemaps
    def link_count
      sitemap_index.total_link_count
    end

    # Return the host to use in links to the sitemap files.  This defaults to your
    # +default_host+.
    def sitemaps_host
      @sitemaps_host || @default_host
    end

    # Lazy-initialize a sitemap instance when it's accessed
    def sitemap
      @sitemap ||= SitemapGenerator::Builder::SitemapFile.new(sitemap_location)
    end

    # Lazy-initialize a sitemap index instance when it's accessed
    def sitemap_index
      @sitemap_index ||= SitemapGenerator::Builder::SitemapIndexFile.new(sitemap_index_location)
    end

    def finalize!
      finalize_sitemap!
      finalize_sitemap_index!
    end

    protected

    # Set each option on this instance using accessor methods.  This will affect
    # both the sitemap and the sitemap index.
    def set_options(opts={})
      opts.each_pair do |key, value|
        send("#{key}=", value)
      end
    end

    # Given +opts+, return a hash of options prepped for creating a new group from this LinkSet.
    # If <tt>:public_path</tt> is present in +opts+ it is removed because groups cannot
    # change the public path.
    def options_for_group(opts)
      opts.delete(:public_path)
      opts.reverse_merge!(
        :include_index => false,
        :include_root => false,
        :sitemap_index => sitemap_index
      )

      # Reverse merge the current settings
      current_settings = [
        :include_root,
        :include_index,
        :sitemaps_path,
        :public_path,
        :sitemaps_host,
        :verbose,
        :default_host
      ].inject({}) do |hash, key|
        if value = instance_variable_get(:"@#{key}")
          hash[key] = value
        end
        hash
      end
      opts.reverse_merge!(current_settings)
      opts
    end

    # Add default links if those options are turned on.  Record the fact that we have done so
    # in an instance variable.
    def add_default_links
      sitemap.add('/', :lastmod => Time.now, :changefreq => 'always', :priority => 1.0, :host => @default_host) if include_root
      sitemap.add(sitemap_index, :lastmod => Time.now, :changefreq => 'always', :priority => 1.0) if include_index
      @added_default_links = true
    end

    # Finalize a sitemap by including it in the index and outputting a summary line.
    # Do nothing if it has already been finalized.
    #
    # Don't finalize if the sitemap is empty and a group has been created.  The reason
    # being that the group will have written out its sitemap.
    #
    # Add the default links if they have not been added yet and no groups have been created.
    # If the default links haven't been added we know that the sitemap is empty,
    # because they are added on the first call to add().  This ensure that if the
    # block passed to create() is empty the default links are still included in the
    # sitemap.
    def finalize_sitemap!
      add_default_links if !@added_default_links && !@created_group
      return if sitemap.finalized? || sitemap.empty? && @created_group
      sitemap_index.add(sitemap)
      puts sitemap.summary if verbose
    end

    # Finalize a sitemap index and output a summary line.  Do nothing if it has already
    # been finalized.
    def finalize_sitemap_index!
      return if @protect_index || sitemap_index.finalized?
      sitemap_index.finalize!
      puts sitemap_index.summary if verbose
    end

    # Return the interpreter linked to this instance.
    def interpreter
      require 'sitemap_generator/interpreter'
      @interpreter ||= SitemapGenerator::Interpreter.new(:link_set => self)
    end

    module LocationHelpers
      public

      # Set the host name, including protocol, that will be used by default on each
      # of your sitemap links.  You can pass a different host in your options to `add`
      # if you need to change it on a per-link basis.
      def default_host=(value)
        @default_host = value
        update_location_info(:host, value)
      end

      # Set the public_path.  This path gives the location of your public directory.
      # The default is the public/ directory in your Rails root.  Or if Rails is not
      # found, it defaults to public/ in the current directory (of the process).
      #
      # Example: 'tmp/' if you don't want to generate in public for some reason.
      #
      # Set to nil to use the current directory.
      def public_path=(value)
        @public_path = Pathname.new(value.to_s)
        @public_path = SitemapGenerator.app.root + @public_path if @public_path.relative?
        update_location_info(:public_path, @public_path)
        @public_path
      end

      # Return a Pathname with the full path to the public directory
      def public_path
        @public_path ||= self.send(:public_path=, 'public/')
      end

      # Set the sitemaps_path.  This path gives the location to write sitemaps to
      # relative to your public_path.
      # Example: 'sitemaps/' to generate your sitemaps in 'public/sitemaps/'.
      def sitemaps_path=(value)
        @sitemaps_path = value
        update_location_info(:sitemaps_path, value)
      end

      # Set the host name, including protocol, that will be used on all links to your sitemap
      # files.  Useful when the server that hosts the sitemaps is not on the same host as
      # the links in the sitemap.
      def sitemaps_host=(value)
        @sitemaps_host = value
        update_location_info(:host, value)
      end

      # Set the filename base to use when generating sitemaps and sitemap indexes.
      # The index name will be +value+ with <tt>_index.xml.gz</tt> appended.
      # === Example
      # <tt>filename = :sitemap</tt>
      def filename=(value)
        @filename = value
        self.sitemaps_namer = SitemapGenerator::SitemapNamer.new(@filename)
        self.sitemap_index_namer = SitemapGenerator::SitemapIndexNamer.new("#{@filename}_index")
      end

      # Set the namer to use when generating SitemapFiles (does not apply to the
      # SitemapIndexFile)
      def sitemaps_namer=(value)
        @sitemaps_namer = value
        @sitemap.location[:namer] = value if @sitemap && !@sitemap.finalized?
      end

      # Return the current sitemaps namer object.  If it not set, looks for it on
      # the current sitemap and if there is no sitemap, creates a new one using
      # the current filename.
      def sitemaps_namer
        @sitemaps_namer ||= @sitemap && @sitemap.location.namer || SitemapGenerator::SitemapNamer.new(@filename)
      end

      # Set the namer to use when generating SitemapFiles (does not apply to the
      # SitemapIndexFile)
      def sitemap_index_namer=(value)
        @sitemap_index_namer = value
        @sitemap_index.location[:namer] = value if @sitemap_index && !@sitemap_index.finalized? && !@protect_index
      end

      def sitemap_index_namer
        @sitemap_index_namer ||= @sitemap_index && @sitemap_index.location.namer || SitemapGenerator::SitemapIndexNamer.new("#{@filename}_index")
      end

      # Return a new +SitemapLocation+ instance with the current options included
      def sitemap_location
        SitemapGenerator::SitemapLocation.new(
          :host => sitemaps_host,
          :namer => sitemaps_namer,
          :public_path => public_path,
          :sitemaps_path => @sitemaps_path
        )
      end

      # Return a new +SitemapIndexLocation+ instance with the current options included
      def sitemap_index_location
        SitemapGenerator::SitemapLocation.new(
          :host => sitemaps_host,
          :namer => sitemap_index_namer,
          :public_path => public_path,
          :sitemaps_path => @sitemaps_path
        )
      end

      protected

      # Update the given attribute on the current sitemap index and sitemap file location objects.
      # But don't create the index or sitemap files yet if they are not already created.
      def update_location_info(attribute, value, opts={})
        opts.reverse_merge!(:include_index => !@protect_index)
        @sitemap_index.location[attribute] = value if opts[:include_index] && @sitemap_index && !@sitemap_index.finalized?
        @sitemap.location[attribute] = value if @sitemap && !@sitemap.finalized?
      end
    end
    include LocationHelpers
  end
end