lib/html2rss/item.rb in html2rss-0.3.3 vs lib/html2rss/item.rb in html2rss-0.4.0
- old
+ new
@@ -1,32 +1,31 @@
require 'faraday'
require 'faraday_middleware'
require 'open-uri'
require 'nokogiri'
-require_relative 'item_extractor'
+require_relative 'item_extractors'
require_relative 'attribute_post_processors'
module Html2rss
class Item
- attr_reader :xml, :config
-
def initialize(xml, config)
@xml = xml
@config = config
end
+ private_class_method :new
+
def respond_to_missing?(method_name, _include_private = false)
config.attribute_names.include?(method_name) || super
end
def method_missing(method_name, *_args)
attribute_config = config.options(method_name.to_s)
return super unless attribute_config
- extractor = attribute_config['extractor'] || 'text'
- proc = ItemExtractor.const_get extractor.upcase.to_sym
- value = proc.call(xml, attribute_config)
+ extractor = ItemExtractors.get_extractor(attribute_config['extractor'])
+ value = extractor.new(xml, attribute_config).get
post_process(value, attribute_config.fetch('post_process', false))
end
def available_attributes
@@ -36,35 +35,45 @@
def valid?
[title.to_s, description.to_s].join('') != ''
end
+ ##
+ # @return [Array]
def categories
- config.categories.map(&method(:method_missing)).uniq.keep_if { |category| category.to_s != '' }
+ categories = config.categories
+ categories.map!(&method(:method_missing))
+ categories.uniq!
+ categories.keep_if { |category| category.to_s != '' }
end
+ ##
+ # @return [Array]
def self.from_url(url, config)
- connection = Faraday.new(url: url, headers: config.headers) { |faraday|
- faraday.use FaradayMiddleware::FollowRedirects
- faraday.adapter Faraday.default_adapter
- }
-
- page = Nokogiri::HTML(connection.get.body)
+ page = Nokogiri::HTML(get_body_from_url(url, config.headers))
page.css(config.selector('items')).map do |xml_item|
new xml_item, config
end
end
private
- def post_process(value, post_process_options = [])
- return value unless post_process_options
+ def self.get_body_from_url(url, headers)
+ Faraday.new(url: url, headers: headers) do |faraday|
+ faraday.use FaradayMiddleware::FollowRedirects
+ faraday.adapter Faraday.default_adapter
+ end.get.body
+ end
+ private_class_method :get_body_from_url
- post_process_options = [post_process_options] unless post_process_options.is_a?(Array)
+ attr_reader :xml, :config
- post_process_options.each do |options|
- value = AttributePostProcessors.get_processor(options)
- .new(value, options, self)
+ def post_process(value, post_process_options)
+ return value unless post_process_options
+
+ [post_process_options].flatten.each do |options|
+ value = AttributePostProcessors.get_processor(options['name'])
+ .new(value, options: options, item: self, config: @config)
.get
end
value
end