Sha256: 9f53d5df91e417eddfe0d00e56e91f72649dd6dcabae7b9776739d00fc5a13c9

Contents?: true

Size: 871 Bytes

Versions: 44

Compression:

Stored size: 871 Bytes

Contents

module Govspeak
  class LinkExtractor
    def initialize(document, website_root: nil)
      @document = document
      @website_root = website_root
    end

    def call
      @call ||= extract_links
    end

  private

    attr_reader :document, :website_root

    def extract_links
      document_anchors
        .map { |link| extract_href_from_link(link) }
        .reject(&:blank?)
    end

    def extract_href_from_link(link)
      href = link["href"] || ""
      if website_root && href.start_with?("/")
        "#{website_root}#{href}"
      else
        href
      end
    end

    def document_anchors
      processed_govspeak.css("a[href]").css('a:not([href^="mailto"])').css('a:not([href^="#"])')
    end

    def processed_govspeak
      doc = Nokogiri::HTML::Document.new
      doc.encoding = "UTF-8"

      doc.fragment(document.to_html)
    end
  end
end

Version data entries

44 entries across 44 versions & 1 rubygems

Version Path
govspeak-8.7.0 lib/govspeak/link_extractor.rb
govspeak-8.6.1 lib/govspeak/link_extractor.rb
govspeak-8.6.0 lib/govspeak/link_extractor.rb
govspeak-8.5.1 lib/govspeak/link_extractor.rb
govspeak-8.5.0 lib/govspeak/link_extractor.rb
govspeak-8.4.1 lib/govspeak/link_extractor.rb
govspeak-8.4.0 lib/govspeak/link_extractor.rb
govspeak-8.3.4 lib/govspeak/link_extractor.rb
govspeak-8.3.3 lib/govspeak/link_extractor.rb
govspeak-8.3.2 lib/govspeak/link_extractor.rb
govspeak-8.3.1 lib/govspeak/link_extractor.rb
govspeak-8.3.0 lib/govspeak/link_extractor.rb
govspeak-8.2.1 lib/govspeak/link_extractor.rb
govspeak-8.2.0 lib/govspeak/link_extractor.rb
govspeak-8.1.0 lib/govspeak/link_extractor.rb
govspeak-8.0.1 lib/govspeak/link_extractor.rb
govspeak-8.0.0 lib/govspeak/link_extractor.rb
govspeak-7.1.1 lib/govspeak/link_extractor.rb
govspeak-7.1.0 lib/govspeak/link_extractor.rb
govspeak-7.0.2 lib/govspeak/link_extractor.rb