lib/html2rss/item_extractor.rb in html2rss-0.3.0 vs lib/html2rss/item_extractor.rb in html2rss-0.3.1

- old
+ new

@@ -1,12 +1,17 @@ module Html2rss module ItemExtractor - TEXT = proc { |xml, options| xml.css(options['selector'])&.text&.strip } - ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']).to_s } + TEXT = proc { |xml, options| + element(xml, options)&.text&.strip&.split&.join(' ') + } + ATTRIBUTE = proc { |xml, options| + element(xml, options).attr(options['attribute']).to_s + } + HREF = proc { |xml, options| - href = xml.css(options['selector']).attr('href').to_s + href = element(xml, options).attr('href').to_s path, query = href.split('?') if href.start_with?('http') uri = URI(href) else @@ -16,10 +21,17 @@ end uri } - HTML = proc { |xml, options| xml.css(options['selector']).to_s } + HTML = proc { |xml, options| + element(xml, options).to_s + } + STATIC = proc { |_xml, options| options['static'] } CURRENT_TIME = proc { |_xml, _options| Time.new } + + def self.element(xml, options) + options['selector'] ? xml.css(options['selector']) : xml + end end end