lib/rdf/microdata/reader.rb in rdf-microdata-0.1.3 vs lib/rdf/microdata/reader.rb in rdf-microdata-0.2.0

- old
+ new

@@ -2,13 +2,16 @@ module RDF::Microdata ## # An Microdata parser in Ruby # - # Based on processing rules described here: - # @see http://dev.w3.org/html5/md/ + # Based on processing rules, amended with the following: + # * property generation from tokens now uses the associated @itemtype as the basis for generation + # * implicit triples are not generated, only those with @item* + # * @datetime values are scanned lexically to find appropriate datatype # + # @see http://dev.w3.org/html5/md/ # @author [Gregg Kellogg](http://kellogg-assoc.com/) class Reader < RDF::Reader format Format XHTML = "http://www.w3.org/1999/xhtml" URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video) @@ -37,12 +40,10 @@ # whether to canonicalize parsed literals # @option options [Boolean] :intern (true) # whether to intern all parsed URIs # @option options [#to_s] :base_uri (nil) # the base URI to use when resolving relative URIs - # @option options [Boolean] :rdf_terms (false) - # Generate URIs for itemprop terms based on namespace of itemtype # @option options [Array] :debug # Array to place debug messages # @return [reader] # @yield [reader] `self` # @yieldparam [RDF::Reader] reader @@ -161,23 +162,10 @@ add_debug(base_el, "parse_whole_doc: base='#{base}'") else base = RDF::URI("") end - ## - # 1. If the title element is not null, then generate the following triple: - # - # subject: the document's current address - # predicate: http://purl.org/dc/terms/title - # object: the concatenation of the data of all the child text nodes of the title element, - # in tree order, as a plain literal, with the language information set from - # the language of the title element, if it is not unknown. - doc.css('html>head>title').each do |title| - lang = title.attribute('language') - add_triple(title, base, RDF::DC.title, title.inner_text) - end - # 2. For each a, area, and link element in the Document, run these substeps: # # * If the element does not have a rel attribute, then skip this element. # * If the element does not have an href attribute, then skip this element. # * If resolving the element's href attribute relative to the element is not successful, @@ -336,19 +324,14 @@ add_debug(element, "gentrips(6.1.3): value=#{value.inspect}") predicate = if name_uri.absolute? name_uri - elsif @options[:rdf_terms] + else # Use the URI of the type to create URIs for @itemprop terms add_debug(element, "gentrips: rdf_type=#{rdf_type}") predicate = RDF::URI(rdf_type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1' + name)) - elsif !name.include?(':') - s = type.to_s - s += '%20' unless s[-1,1] == ':' - s += name - RDF::MD[s.gsub('#', '%23')] end add_debug(element, "gentrips(6.1.5): predicate=#{predicate}") add_triple(element, subject, predicate, value) if predicate end @@ -476,10 +459,15 @@ when %w(a area link).include?(element.name) uri(element.attribute('href'), element.base) when %w(object).include?(element.name) uri(element.attribute('data'), element.base) when %w(time).include?(element.name) && element.has_attribute?('datetime') - RDF::Literal::DateTime.new(element.attribute('datetime')) + # Lexically scan value and assign appropriate type, otherwise, leave untyped + v = element.attribute('datetime').to_s + datatype = %w(Date Time DateTime).map {|t| RDF::Literal.const_get(t)}.detect do |dt| + v.match(dt::GRAMMAR) + end || RDF::Literal + datatype.new(v) else RDF::Literal.new(element.text, :language => element.language) end end \ No newline at end of file