Sha256: e0f5f064f832cf4f198ab7ec1bce947f5bfc112fd2dbad35daba6b1ec209b280

Contents?: true

Size: 871 Bytes

Versions: 4

Compression:

Stored size: 871 Bytes

Contents

module Govspeak
  class LinkExtractor
    def initialize(document, website_root: nil)
      @document = document
      @website_root = website_root
    end

    def call
      @call ||= extract_links
    end

  private

    attr_reader :document, :website_root

    def extract_links
      document_anchors.
        map { |link| extract_href_from_link(link) }.
        reject(&:blank?)
    end

    def extract_href_from_link(link)
      href = link["href"] || ""
      if website_root && href.start_with?("/")
        "#{website_root}#{href}"
      else
        href
      end
    end

    def document_anchors
      processed_govspeak.css("a[href]").css('a:not([href^="mailto"])').css('a:not([href^="#"])')
    end

    def processed_govspeak
      doc = Nokogiri::HTML::Document.new
      doc.encoding = "UTF-8"

      doc.fragment(document.to_html)
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
govspeak-6.5.4 lib/govspeak/link_extractor.rb
govspeak-6.5.3 lib/govspeak/link_extractor.rb
govspeak-6.5.2 lib/govspeak/link_extractor.rb
govspeak-6.5.1 lib/govspeak/link_extractor.rb