lib/html2rss/item_extractor.rb in html2rss-0.3.0 vs lib/html2rss/item_extractor.rb in html2rss-0.3.1
- old
+ new
@@ -1,12 +1,17 @@
module Html2rss
module ItemExtractor
- TEXT = proc { |xml, options| xml.css(options['selector'])&.text&.strip }
- ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']).to_s }
+ TEXT = proc { |xml, options|
+ element(xml, options)&.text&.strip&.split&.join(' ')
+ }
+ ATTRIBUTE = proc { |xml, options|
+ element(xml, options).attr(options['attribute']).to_s
+ }
+
HREF = proc { |xml, options|
- href = xml.css(options['selector']).attr('href').to_s
+ href = element(xml, options).attr('href').to_s
path, query = href.split('?')
if href.start_with?('http')
uri = URI(href)
else
@@ -16,10 +21,17 @@
end
uri
}
- HTML = proc { |xml, options| xml.css(options['selector']).to_s }
+ HTML = proc { |xml, options|
+ element(xml, options).to_s
+ }
+
STATIC = proc { |_xml, options| options['static'] }
CURRENT_TIME = proc { |_xml, _options| Time.new }
+
+ def self.element(xml, options)
+ options['selector'] ? xml.css(options['selector']) : xml
+ end
end
end