Sha256: 184a830e1a7690612bba11269bb7014c66e2dd402298cb4b7f2743b68b5c214b
Contents?: true
Size: 1.79 KB
Versions: 1
Compression:
Stored size: 1.79 KB
Contents
class Earl::Scraper class << self @@registry = [] attr_reader :regexp attr_reader :attributes def match(regexp) @regexp = regexp register self end def define_attribute(name, &block) @attributes ||= {} @attributes[name] = block end def for(url, earl_source) @@registry.each do |klass| return klass.new(url,earl_source) if klass.regexp.match(url) end return Earl::Scraper.new(url,earl_source) end private def register(scraper_klass) @@registry << scraper_klass end end attr_reader :earl_source def initialize(url, earl_source = nil) @url = url @earl_source = earl_source end def response @response ||= earl_source && Nokogiri::HTML(earl_source.uri_response) end def attribute(name) return unless has_attribute?(name) self.attributes[name].call(response) end def attributes if self.class.superclass == Earl::Scraper self.class.superclass.attributes.merge(self.class.attributes) else self.class.attributes end end def has_attribute?(name) return false unless self.class.attributes self.attributes.has_key?(name) end define_attribute :title do |doc| if title = doc.at('title') title.content end end define_attribute :image do |doc| if first_image = doc.at('img') first_image['src'] end end define_attribute :description do |doc| if element = doc.at("meta[name='description']") element['content'] end end define_attribute :rss_feed do |doc| if element = doc.at("link[type='application/rss+xml']") element['href'] end end define_attribute :atom_feed do |doc| if element = doc.at("link[type='application/atom+xml']") element['href'] end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
earl-1.0.0 | lib/earl/scraper.rb |