Sha256: bce82b29d851b362ceb8fe0e2bb0486cb15e4d4fa46c7f2783c57ce3fa45bcce
Contents?: true
Size: 1.64 KB
Versions: 5
Compression:
Stored size: 1.64 KB
Contents
require 'nokogiri' module Mida # Class that holds the extracted Microdata class Document # An Array of Mida::Item objects. These are all top-level # and hence not properties of other Items attr_reader :items # Create a new Microdata object # # [target] The string containing the html that you want to parse # [page_url] The url of target used for form absolute urls. This must # include the filename, e.g. index.html. def initialize(target, page_url=nil) @doc = Nokogiri(target) @page_url = page_url @items = extract_items end # Returns an array of matching Mida::Item objects # # [vocabulary] A regexp to match the item types against def search(vocabulary, items=@items) found_items = [] items.each do |item| # Allows matching against empty string, otherwise couldn't match # as item.type can be nil if (item.type.nil? && "" =~ vocabulary) || (item.type =~ vocabulary) found_items << item end found_items += search_values(item.properties.values, vocabulary) end found_items end private def extract_items items_doc = @doc.search('//*[@itemscope and not(@itemprop)]') return nil unless items_doc items_doc.collect do |item_doc| Item.new(item_doc, @page_url) end end def search_values(values, vocabulary) items = [] values.each do |value| if value.is_a?(Mida::Item) then items += search(vocabulary, [value]) elsif value.is_a?(Array) then items += search_values(value, vocabulary) end end items end end end
Version data entries
5 entries across 5 versions & 1 rubygems
Version | Path |
---|---|
mida-0.1.3 | lib/mida/document.rb |
mida-0.1.2 | lib/mida/document.rb |
mida-0.1.1 | lib/mida/document.rb |
mida-0.1.0 | lib/mida/document.rb |
mida-0.0.0 | lib/mida/document.rb |