text.rb in html2rss-0.10.0

- old
+ new

@@ -1,12 +1,14 @@
+# frozen_string_literal: true
+
 module Html2rss
   module ItemExtractors
     ##
-    # Return the text of the attribute. This is the default extractor used,
+    # Return the text content of the attribute. This is the default extractor used,
     # when no extractor is explicitly given.
     #
-    # Imagine this HTML structure:
+    # Example HTML structure:
     #
     #     <p>Lorem <b>ipsum</b> dolor ...</p>
     #
     # YAML usage example:
     #
@@ -16,17 +18,27 @@
     #        extractor: text
     #
     # Would return:
     #    'Lorem ipsum dolor ...'
     class Text
+      # The available options for the text extractor.
+      Options = Struct.new('TextOptions', :selector, keyword_init: true)
+
+      ##
+      # Initializes the Text extractor.
+      #
+      # @param xml [Nokogiri::XML::Element]
+      # @param options [Options]
       def initialize(xml, options)
-        @element = ItemExtractors.element(xml, options)
+        @element = ItemExtractors.element(xml, options.selector)
       end
 
       ##
-      # @return [String]
+      # Retrieves and returns the text content of the element.
+      #
+      # @return [String] The text content.
       def get
-        @element.text.to_s.strip.split.join(' ')
+        @element.text.to_s.strip.gsub(/\s+/, ' ')
       end
     end
   end
 end