Sha256: ace364e8c1126a9efb4bfd35d081b6d12f1c02f9cb757cdd456ddb6a7be07f08

Contents?: true

Size: 1.98 KB

Versions: 4

Compression:

Stored size: 1.98 KB

Contents

require 'nokogiri'
require 'uri'

module Mida

  # Module that parses itemprop elements
  module Property

    # Returns a Hash representing the property.
    # Hash is of the form {'property name' => 'value'}
    # [element] The itemprop element to be parsed
    # [page_url] The url of the page, including the filename, used to form absolute urls
    def self.parse(element, page_url=nil)
      hash = {}
      extract_property_names(element).each do |name|
        hash[name] = extract_property(element, page_url)
      end
      hash
    end

    NON_TEXTCONTENT_ELEMENTS = {
      'a' => 'href',        'area' => 'href',
      'audio' => 'src',     'embed' => 'src',
      'iframe' => 'src',    'img' => 'src',
      'link' => 'href',     'meta' => 'content',
      'object' => 'data',   'source' => 'src',
      'time' => 'datetime', 'track' => 'src',
      'video' => 'src'
    }

    URL_ATTRIBUTES = ['data', 'href', 'src']

    # This returns an empty string if can't form a valid
    # absolute url as per the Microdata spec.
    def self.make_absolute_url(url, page_url)
      return url unless URI.parse(url).relative?
      begin
        URI.parse(page_url).merge(url).to_s
      rescue URI::Error
        ''
      end
    end

    def self.extract_property_names(itemprop)
      itemprop_attr = itemprop.attribute('itemprop')
      itemprop_attr ? itemprop_attr.value.split() : []
    end

    def self.extract_property_value(itemprop, page_url)
      element = itemprop.name
      if NON_TEXTCONTENT_ELEMENTS.has_key?(element)
        attribute = NON_TEXTCONTENT_ELEMENTS[element]
        value = itemprop.attribute(attribute).value
        (URL_ATTRIBUTES.include?(attribute)) ? make_absolute_url(value, page_url) : value
      else
        itemprop.inner_text
      end
    end

    def self.extract_property(itemprop, page_url)
      if itemprop.attribute('itemscope')
        Mida::Item.new(itemprop, page_url)
      else
        extract_property_value(itemprop, page_url)
      end
    end

  end

end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
mida-0.1.2 lib/mida/property.rb
mida-0.1.1 lib/mida/property.rb
mida-0.1.0 lib/mida/property.rb
mida-0.0.0 lib/mida/property.rb