lib/html2rss/item_extractors/text.rb in html2rss-0.9.0 vs lib/html2rss/item_extractors/text.rb in html2rss-0.10.0

- old
+ new

@@ -1,12 +1,14 @@ +# frozen_string_literal: true + module Html2rss module ItemExtractors ## - # Return the text of the attribute. This is the default extractor used, + # Return the text content of the attribute. This is the default extractor used, # when no extractor is explicitly given. # - # Imagine this HTML structure: + # Example HTML structure: # # <p>Lorem <b>ipsum</b> dolor ...</p> # # YAML usage example: # @@ -16,17 +18,27 @@ # extractor: text # # Would return: # 'Lorem ipsum dolor ...' class Text + # The available options for the text extractor. + Options = Struct.new('TextOptions', :selector, keyword_init: true) + + ## + # Initializes the Text extractor. + # + # @param xml [Nokogiri::XML::Element] + # @param options [Options] def initialize(xml, options) - @element = ItemExtractors.element(xml, options) + @element = ItemExtractors.element(xml, options.selector) end ## - # @return [String] + # Retrieves and returns the text content of the element. + # + # @return [String] The text content. def get - @element.text.to_s.strip.split.join(' ') + @element.text.to_s.strip.gsub(/\s+/, ' ') end end end end