Sha256: 09a04d1b899005e32494796479a784238a855824a38ac56a5e3f185669061be0

Contents?: true

Size: 871 Bytes

Versions: 13

Compression:

Stored size: 871 Bytes

Contents

module Govspeak
  class LinkExtractor
    def initialize(document, website_root: nil)
      @document = document
      @website_root = website_root
    end

    def call
      @call ||= extract_links
    end

  private

    attr_reader :document, :website_root

    def extract_links
      document_anchors.
        map { |link| extract_href_from_link(link) }.
        reject(&:blank?)
    end

    def extract_href_from_link(link)
      href = link['href'] || ''
      if website_root && href.start_with?('/')
        "#{website_root}#{href}"
      else
        href
      end
    end

    def document_anchors
      processed_govspeak.css('a[href]').css('a:not([href^="mailto"])').css('a:not([href^="#"])')
    end

    def processed_govspeak
      doc = Nokogiri::HTML::Document.new
      doc.encoding = "UTF-8"

      doc.fragment(document.to_html)
    end
  end
end

Version data entries

13 entries across 13 versions & 1 rubygems

Version Path
govspeak-6.5.0 lib/govspeak/link_extractor.rb
govspeak-6.4.0 lib/govspeak/link_extractor.rb
govspeak-6.3.0 lib/govspeak/link_extractor.rb
govspeak-6.2.1 lib/govspeak/link_extractor.rb
govspeak-6.2.0 lib/govspeak/link_extractor.rb
govspeak-6.1.1 lib/govspeak/link_extractor.rb
govspeak-6.1.0 lib/govspeak/link_extractor.rb
govspeak-6.0.0 lib/govspeak/link_extractor.rb
govspeak-5.9.1 lib/govspeak/link_extractor.rb
govspeak-5.9.0 lib/govspeak/link_extractor.rb
govspeak-5.8.0 lib/govspeak/link_extractor.rb
govspeak-5.7.1 lib/govspeak/link_extractor.rb
govspeak-5.7.0 lib/govspeak/link_extractor.rb