Sha256: 9eef511613b43b96a47108e830842ba75c3bcb103e2a9ade23da1a60cb043048

Contents?: true

Size: 1.45 KB

Versions: 6

Compression:

Stored size: 1.45 KB

Contents

# encoding: utf-8

require 'set'

module ::Nanoc::Extra

  class LinkCollector

    def initialize(filenames, mode = nil)
      Nanoc::Extra::JRubyNokogiriWarner.check_and_warn

      @filenames = filenames
      @filter =
        case mode
        when nil
          lambda { |h| true }
        when :external
          lambda { |h| external_href?(h) }
        when :internal
          lambda { |h| !external_href?(h) }
        else
          raise ArgumentError, 'Expected mode argument to be :internal, :external or nil'
        end
    end

    def filenames_per_href
      require 'nokogiri'
      filenames_per_href = {}
      @filenames.each do |filename|
        hrefs_in_file(filename).each do |href|
          filenames_per_href[href] ||= Set.new
          filenames_per_href[href] << filename
        end
      end
      filenames_per_href
    end

    def external_href?(href)
      !!(href =~ %r{^(\/\/|[a-z\-]+:)})
    end

    def hrefs_in_file(filename)
      hrefs_in_file = Set.new
      doc = Nokogiri::HTML(::File.read(filename))
      doc.css('a').each { |e| hrefs_in_file << e[:href] unless e[:href].nil? }
      doc.css('img').each { |e| hrefs_in_file << e[:src]  }

      # Convert protocol-relative urls
      # e.g. //example.com => http://example.com
      hrefs_in_file.map! { |href| href.gsub /^\/\//, 'http://' }

      # Strip fragment
      hrefs_in_file.map! { |href| href.gsub(/#.*$/, '') }

      hrefs_in_file.select(&@filter)
    end

  end

end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
nanoc-3.7.3 lib/nanoc/extra/link_collector.rb
nanoc-3.7.2 lib/nanoc/extra/link_collector.rb
nanoc-3.7.1 lib/nanoc/extra/link_collector.rb
nanoc-3.7.0 lib/nanoc/extra/link_collector.rb
nanoc-3.6.11 lib/nanoc/extra/link_collector.rb
nanoc-3.6.10 lib/nanoc/extra/link_collector.rb