lib/brief/document/content_extractor.rb in brief-1.6.1 vs lib/brief/document/content_extractor.rb in brief-1.7.0
- old
+ new
@@ -10,11 +10,11 @@
def model_class
document.model_class
end
def content_schema_attributes
- model_class.definition.content_schema.attributes
+ model_class.definition.content_schema.attributes.symbolize_keys!
end
def extracted_content_data
me = self
content_schema_attributes.keys.reduce({}.to_mash) do |memo, attr|
@@ -23,42 +23,70 @@
memo
end
end
def respond_to?(meth)
- content_schema_attributes.key?(meth) || super
+ supports_extraction?(meth) || super
end
+ def extraction_rule_for(attribute)
+ content_schema_attributes.fetch(attribute.to_sym, nil)
+ end
+
+ def selector_for(attribute)
+ extraction_rule_for(attribute).first
+ end
+
+ def supports_extraction?(attribute)
+ content_schema_attributes.key?(attribute.to_sym)
+ end
+
def method_missing(meth, *_args, &_block)
- if settings = content_schema_attributes.fetch(meth, nil)
- if settings.args.length == 1 && settings.args.first.is_a?(String)
- selector = settings.args.first
- matches = document.css(selector)
+ return super unless supports_extraction?(meth)
+ rule = ExtractionRule.new(extraction_rule_for(meth))
+ rule.apply_to(document)
+ end
- if matches.length > 1
- selector.match(/first-of-type/) ? matches.first.text : matches.map(&:text)
- else
- matches.first.try(:text)
- end
- elsif settings.args.first.to_s.match(/code/i) && (settings.args.last.serialize rescue nil)
- selector = settings.args.first
- opts = settings.args.last
+ class ExtractionRule
+ attr_reader :rule, :args
- matches = document.css(selector)
+ def initialize(rule)
+ @rule = rule
+ @args = rule.args
+ end
- val = if matches.length > 1
- selector.match(/first-of-type/) ? matches.first.text : matches.map(&:text)
- else
- matches.first.try(:text)
- end
+ def options
+ args[1] || {}.to_mash
+ end
- if val && opts.serialize == :yaml
- return (YAML.load(val) rescue {}).to_mash
- end
+ def deserialize?
+ !!(options.serialize.present? && options.serialize)
+ end
- if val && opts.serialize == :json
- return (JSON.parse(val) rescue {}).to_mash
- end
+ def format
+ options.serialize.to_sym
+ end
+
+ def selector
+ args.first if args.first.is_a?(String)
+ end
+
+ def apply_to(document)
+ raise 'Must specify a selector' unless selector
+
+ extracted = document.css(selector)
+
+ return nil if extracted.length == 0
+
+ case
+ when deserialize? && format == :json
+ (JSON.parse(extracted.text.to_s) rescue {}).to_mash
+ when deserialize? && format == :yaml
+ (YAML.load(extracted.text.to_s) rescue {}).to_mash
+ when selector.match(/first-of-type/) && extracted.length > 0
+ extracted.first.text
+ else
+ extracted.map(&:text)
end
end
end
end
end