lib/mida/itemprop.rb in mida-0.2.0 vs lib/mida/itemprop.rb in mida-0.3.0
- old
+ new
@@ -1,23 +1,14 @@
require 'nokogiri'
require 'uri'
+require 'mida/itemscope'
module Mida
- # Module that parses itemprop elements
- module Itemprop
+ # Class that parses itemprop elements
+ class Itemprop
- # Returns a Hash representing the property.
- # Hash is of the form {'property name' => 'value'}
- # [element] The itemprop element to be parsed
- # [page_url] The url of the page, including the filename, used to form absolute urls
- def self.parse(element, page_url=nil)
- extract_property_names(element).each_with_object({}) do |name, memo|
- memo[name] = extract_property(element, page_url)
- end
- end
-
NON_TEXTCONTENT_ELEMENTS = {
'a' => 'href', 'area' => 'href',
'audio' => 'src', 'embed' => 'src',
'iframe' => 'src', 'img' => 'src',
'link' => 'href', 'meta' => 'content',
@@ -26,46 +17,80 @@
'video' => 'src'
}
URL_ATTRIBUTES = ['data', 'href', 'src']
+ # A Hash representing the properties.
+ # Hash is of the form {'property name' => 'value'}
+ attr_reader :properties
+
+ # Create a new Itemprop object
+ # [element] The itemprop element to be parsed
+ # [page_url] The url of the page, including filename, used to form
+ # absolute urls
+ def initialize(element, page_url=nil)
+ @element, @page_url = element, page_url
+ @properties = extract_properties
+ end
+
+ # Parse the element and return a hash representing the properties.
+ # Hash is of the form {'property name' => 'value'}
+ # [element] The itemprop element to be parsed
+ # [page_url] The url of the page, including filename, used to form
+ # absolute urls
+ def self.parse(element, page_url=nil)
+ self.new(element, page_url).properties
+ end
+
+ private
+ def extract_properties
+ prop_names = extract_property_names
+ prop_names.each_with_object({}) do |name, memo|
+ memo[name] = extract_property
+ end
+ end
+
# This returns an empty string if can't form a valid
# absolute url as per the Microdata spec.
- def self.make_absolute_url(url, page_url)
+ def make_absolute_url(url)
return url unless URI.parse(url).relative?
begin
- URI.parse(page_url).merge(url).to_s
+ URI.parse(@page_url).merge(url).to_s
rescue URI::Error
''
end
end
- def self.extract_property_names(itemprop)
- itemprop_attr = itemprop.attribute('itemprop')
+ def non_textcontent_element?(element)
+ NON_TEXTCONTENT_ELEMENTS.has_key?(element)
+ end
+
+ def url_attribute?(attribute)
+ URL_ATTRIBUTES.include?(attribute)
+ end
+
+ def extract_property_names
+ itemprop_attr = @element.attribute('itemprop')
itemprop_attr ? itemprop_attr.value.split() : []
end
- def self.extract_property_value(itemprop, page_url)
- element = itemprop.name
- if NON_TEXTCONTENT_ELEMENTS.has_key?(element)
+ def extract_property_value
+ element = @element.name
+ if non_textcontent_element?(element)
attribute = NON_TEXTCONTENT_ELEMENTS[element]
- value = itemprop.attribute(attribute).value
- (URL_ATTRIBUTES.include?(attribute)) ? make_absolute_url(value, page_url) : value
+ value = @element.attribute(attribute).value
+ url_attribute?(attribute) ? make_absolute_url(value) : value
else
- itemprop.inner_text
+ @element.inner_text.strip
end
end
- def self.extract_property(itemprop, page_url)
- if itemprop.attribute('itemscope')
- Mida::Item.new(itemprop, page_url)
+ def extract_property
+ if @element.attribute('itemscope')
+ Itemscope.new(@element, @page_url)
else
- extract_property_value(itemprop, page_url)
+ extract_property_value
end
end
- private_class_method :make_absolute_url, :extract_property_names
- private_class_method :extract_property_value, :extract_property
-
end
-
end