Sha256: 337893ce097e71d9adaf8e4dea9c798ad267e2de3e58a539e437cdc8199d7f36

Contents?: true

Size: 1.43 KB

Versions: 1

Compression:

Stored size: 1.43 KB

Contents

# encoding: utf-8

require 'set'

module ::Nanoc::Extra
  class LinkCollector
    def initialize(filenames, mode = nil)
      Nanoc::Extra::JRubyNokogiriWarner.check_and_warn

      @filenames = filenames
      @filter =
        case mode
        when nil
          ->(_h) { true }
        when :external
          ->(h) { external_href?(h) }
        when :internal
          ->(h) { !external_href?(h) }
        else
          raise ArgumentError, 'Expected mode argument to be :internal, :external or nil'
        end
    end

    def filenames_per_href
      require 'nokogiri'
      filenames_per_href = {}
      @filenames.each do |filename|
        hrefs_in_file(filename).each do |href|
          filenames_per_href[href] ||= Set.new
          filenames_per_href[href] << filename
        end
      end
      filenames_per_href
    end

    def external_href?(href)
      href =~ %r{^(\/\/|[a-z\-]+:)}
    end

    def hrefs_in_file(filename)
      hrefs_in_file = Set.new
      doc = Nokogiri::HTML(::File.read(filename))
      doc.css('a').each { |e| hrefs_in_file << e[:href] unless e[:href].nil? }
      doc.css('img').each { |e| hrefs_in_file << e[:src]  }

      # Convert protocol-relative urls
      # e.g. //example.com => http://example.com
      hrefs_in_file.map! { |href| href.gsub(/^\/\//, 'http://') }

      # Strip fragment
      hrefs_in_file.map! { |href| href.gsub(/#.*$/, '') }

      hrefs_in_file.select(&@filter)
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
nanoc-3.7.5 lib/nanoc/extra/link_collector.rb