Sha256: 42dbf65340df6218ede707745255fa0a39b470b272d1d2226e51249cf86f57bd

Contents?: true

Size: 872 Bytes

Versions: 2

Compression:

Stored size: 872 Bytes

Contents

module Govspeak
  class LinkExtractor
    def initialize(document, website_root: nil)
      @document = document
      @website_root = website_root
    end

    def call
      @links ||= extract_links
    end

  private

    attr_reader :document, :website_root

    def extract_links
      document_anchors.
        map { |link| extract_href_from_link(link) }.
        reject(&:blank?)
    end

    def extract_href_from_link(link)
      href = link['href'] || ''
      if website_root && href.start_with?('/')
        "#{website_root}#{href}"
      else
        href
      end
    end

    def document_anchors
      processed_govspeak.css('a[href]').css('a:not([href^="mailto"])').css('a:not([href^="#"])')
    end

    def processed_govspeak
      doc = Nokogiri::HTML::Document.new
      doc.encoding = "UTF-8"

      doc.fragment(document.to_html)
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
govspeak-5.6.0 lib/govspeak/link_extractor.rb
govspeak-5.5.0 lib/govspeak/link_extractor.rb