Sha256: aaae866d71f6871b0939273487753ab41354cdf2785fc01a27cc8352fb034926

Contents?: true

Size: 1.38 KB

Versions: 11

Compression:

Stored size: 1.38 KB

Contents

# encoding: utf-8

require 'set'

module ::Nanoc::Extra

  class LinkCollector

    def initialize(filenames, mode=nil)
      @filenames = filenames
      @filter = case mode
        when nil
          lambda { |h| true }
        when :external
          lambda { |h| external_href?(h) }
        when :internal
          lambda { |h| !external_href?(h) }
        else
          raise ArgumentError, 'Expected mode argument to be :internal, :external or nil'
        end 
    end

    def filenames_per_href
      require 'nokogiri'
      filenames_per_href = {}
      @filenames.each do |filename|
        hrefs_in_file(filename).each do |href|
          filenames_per_href[href] ||= Set.new
          filenames_per_href[href] << filename
        end
      end
      filenames_per_href
    end

    def external_href?(href)
      !!(href =~ %r{^(\/\/|[a-z\-]+:)})
    end

    def hrefs_in_file(filename)
      hrefs_in_file = Set.new
      doc = Nokogiri::HTML(::File.read(filename))
      doc.css('a').each { |e| hrefs_in_file << e[:href] unless e[:href].nil? }
      doc.css('img').each { |e| hrefs_in_file << e[:src]  }

      # Convert protocol-relative urls
      # e.g. //example.com => http://example.com
      hrefs_in_file.map! { |href| href.gsub /^\/\//, 'http://' }

      # Strip fragment
      hrefs_in_file.map! { |href| href.gsub(/#.*$/, '') }

      hrefs_in_file.select(&@filter)
    end

  end

end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
nanoc-3.6.7 lib/nanoc/extra/link_collector.rb
nanoc-3.6.6 lib/nanoc/extra/link_collector.rb
nanoc-3.6.5 lib/nanoc/extra/link_collector.rb
nanoc-3.6.4 lib/nanoc/extra/link_collector.rb
nanoc-3.6.3 lib/nanoc/extra/link_collector.rb
nanoc-3.6.2 lib/nanoc/extra/link_collector.rb
nanoc-3.6.1 lib/nanoc/extra/link_collector.rb
nanoc-3.6.0 lib/nanoc/extra/link_collector.rb
nanoc-3.5.0 lib/nanoc/extra/link_collector.rb
nanoc-3.5.0b2 lib/nanoc/extra/link_collector.rb
nanoc-3.5.0b1 lib/nanoc/extra/link_collector.rb