# frozen_string_literal: true module Html2rss module ItemExtractors ## # Returns the value of the +href+ attribute. # It always returns absolute URLs. If the extracted +href+ value is a # relative URL, it prepends the channel's URL. # # Imagine this +a+ HTML element with a +href+ attribute: # # ... # # YAML usage example: # channel: # url: http://blog-without-a-feed.example.com # ... # selectors: # link: # selector: a # extractor: href # # Would return: # 'http://blog-without-a-feed.example.com/posts/latest-findings' class Href # The available options for the href (attribute) extractor. Options = Struct.new('HrefOptions', :selector, :channel, keyword_init: true) ## # Initializes the Href extractor. # # @param xml [Nokogiri::XML::Element] # @param options [Options] def initialize(xml, options) @options = options @element = ItemExtractors.element(xml, options.selector) @href = @element.attr('href').to_s end ## # Retrieves and returns the normalized absolute URL. # # @return [String] The absolute URL. def get return nil unless @href sanitized_href = Html2rss::Utils.sanitize_url(@href) Html2rss::Utils.build_absolute_url_from_relative(sanitized_href, @options.channel.url) end end end end