lib/html2rss/item_extractor.rb in html2rss-0.0.1 vs lib/html2rss/item_extractor.rb in html2rss-0.1.0

- old
+ new

@@ -1,25 +1,25 @@ -require 'sanitize' - module Html2rss module ItemExtractor - TEXT = proc { |xml, options| xml.css(options['selector'])&.text } - ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']) } + TEXT = proc { |xml, options| xml.css(options['selector'])&.text&.strip } + ATTRIBUTE = proc { |xml, options| xml.css(options['selector']).attr(options['attribute']).to_s } HREF = proc { |xml, options| - uri = URI(options['channel']['url']) - uri.path = xml.css(options['selector']).attr('href') - uri - } + href = xml.css(options['selector']).attr('href').to_s + path, query = href.split('?') - HTML = proc { |xml, options| - html = xml.css(options['selector']).to_s + if href.start_with?('http') + uri = URI(href) + else + uri = URI(options['channel']['url']) + uri.path = path.start_with?('/') ? path : "/#{path}" + uri.query = query + end - Sanitize.fragment(html, Sanitize::Config.merge( - Sanitize::Config::RELAXED, - add_attributes: { - 'a' => { 'rel' => 'nofollow noopener noreferrer' } - } - )) + uri } + + HTML = proc { |xml, options| xml.css(options['selector']).to_s } + STATIC = proc { |_xml, options| options['static'] } + CURRENT_TIME = proc { |_xml, _options| Time.new } end end