lib/html2rss/item_extractors/href.rb in html2rss-0.9.0 vs lib/html2rss/item_extractors/href.rb in html2rss-0.10.0

- old
+ new

@@ -1,5 +1,7 @@ +# frozen_string_literal: true + module Html2rss module ItemExtractors ## # Returns the value of the +href+ attribute. # It always returns absolute URLs. If the extracted +href+ value is a @@ -19,18 +21,32 @@ # extractor: href # # Would return: # 'http://blog-without-a-feed.example.com/posts/latest-findings' class Href + # The available options for the href (attribute) extractor. + Options = Struct.new('HrefOptions', :selector, :channel, keyword_init: true) + + ## + # Initializes the Href extractor. + # + # @param xml [Nokogiri::XML::Element] + # @param options [Options] def initialize(xml, options) @options = options - element = ItemExtractors.element(xml, options) - @href = Html2rss::Utils.sanitize_url(element.attr('href')) + @element = ItemExtractors.element(xml, options.selector) + @href = @element.attr('href').to_s end - # @return [URI::HTTPS, URI::HTTP] + ## + # Retrieves and returns the normalized absolute URL. + # + # @return [String] The absolute URL. def get - Html2rss::Utils.build_absolute_url_from_relative(@href, @options[:channel][:url]) + return nil unless @href + + sanitized_href = Html2rss::Utils.sanitize_url(@href) + Html2rss::Utils.build_absolute_url_from_relative(sanitized_href, @options.channel.url) end end end end