lib/rdf/microdata/reader.rb in rdf-microdata-0.1.3 vs lib/rdf/microdata/reader.rb in rdf-microdata-0.2.0
- old
+ new
@@ -2,13 +2,16 @@
module RDF::Microdata
##
# An Microdata parser in Ruby
#
- # Based on processing rules described here:
- # @see http://dev.w3.org/html5/md/
+ # Based on processing rules, amended with the following:
+ # * property generation from tokens now uses the associated @itemtype as the basis for generation
+ # * implicit triples are not generated, only those with @item*
+ # * @datetime values are scanned lexically to find appropriate datatype
#
+ # @see http://dev.w3.org/html5/md/
# @author [Gregg Kellogg](http://kellogg-assoc.com/)
class Reader < RDF::Reader
format Format
XHTML = "http://www.w3.org/1999/xhtml"
URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
@@ -37,12 +40,10 @@
# whether to canonicalize parsed literals
# @option options [Boolean] :intern (true)
# whether to intern all parsed URIs
# @option options [#to_s] :base_uri (nil)
# the base URI to use when resolving relative URIs
- # @option options [Boolean] :rdf_terms (false)
- # Generate URIs for itemprop terms based on namespace of itemtype
# @option options [Array] :debug
# Array to place debug messages
# @return [reader]
# @yield [reader] `self`
# @yieldparam [RDF::Reader] reader
@@ -161,23 +162,10 @@
add_debug(base_el, "parse_whole_doc: base='#{base}'")
else
base = RDF::URI("")
end
- ##
- # 1. If the title element is not null, then generate the following triple:
- #
- # subject: the document's current address
- # predicate: http://purl.org/dc/terms/title
- # object: the concatenation of the data of all the child text nodes of the title element,
- # in tree order, as a plain literal, with the language information set from
- # the language of the title element, if it is not unknown.
- doc.css('html>head>title').each do |title|
- lang = title.attribute('language')
- add_triple(title, base, RDF::DC.title, title.inner_text)
- end
-
# 2. For each a, area, and link element in the Document, run these substeps:
#
# * If the element does not have a rel attribute, then skip this element.
# * If the element does not have an href attribute, then skip this element.
# * If resolving the element's href attribute relative to the element is not successful,
@@ -336,19 +324,14 @@
add_debug(element, "gentrips(6.1.3): value=#{value.inspect}")
predicate = if name_uri.absolute?
name_uri
- elsif @options[:rdf_terms]
+ else
# Use the URI of the type to create URIs for @itemprop terms
add_debug(element, "gentrips: rdf_type=#{rdf_type}")
predicate = RDF::URI(rdf_type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1' + name))
- elsif !name.include?(':')
- s = type.to_s
- s += '%20' unless s[-1,1] == ':'
- s += name
- RDF::MD[s.gsub('#', '%23')]
end
add_debug(element, "gentrips(6.1.5): predicate=#{predicate}")
add_triple(element, subject, predicate, value) if predicate
end
@@ -476,10 +459,15 @@
when %w(a area link).include?(element.name)
uri(element.attribute('href'), element.base)
when %w(object).include?(element.name)
uri(element.attribute('data'), element.base)
when %w(time).include?(element.name) && element.has_attribute?('datetime')
- RDF::Literal::DateTime.new(element.attribute('datetime'))
+ # Lexically scan value and assign appropriate type, otherwise, leave untyped
+ v = element.attribute('datetime').to_s
+ datatype = %w(Date Time DateTime).map {|t| RDF::Literal.const_get(t)}.detect do |dt|
+ v.match(dt::GRAMMAR)
+ end || RDF::Literal
+ datatype.new(v)
else
RDF::Literal.new(element.text, :language => element.language)
end
end
\ No newline at end of file