reader.rb in rdf-microdata-0.2.0

- old
+ new

@@ -2,13 +2,16 @@
 
 module RDF::Microdata
   ##
   # An Microdata parser in Ruby
   #
-  # Based on processing rules described here:
-  # @see http://dev.w3.org/html5/md/
+  # Based on processing rules, amended with the following:
+  # * property generation from tokens now uses the associated @itemtype as the basis for generation
+  # * implicit triples are not generated, only those with @item*
+  # * @datetime values are scanned lexically to find appropriate datatype
   #
+  # @see http://dev.w3.org/html5/md/
   # @author [Gregg Kellogg](http://kellogg-assoc.com/)
   class Reader < RDF::Reader
     format Format
     XHTML = "http://www.w3.org/1999/xhtml"
     URL_PROPERTY_ELEMENTS = %w(a area audio embed iframe img link object source track video)
@@ -37,12 +40,10 @@
     #   whether to canonicalize parsed literals
     # @option options [Boolean]  :intern       (true)
     #   whether to intern all parsed URIs
     # @option options [#to_s]    :base_uri     (nil)
     #   the base URI to use when resolving relative URIs
-    # @option options [Boolean]  :rdf_terms     (false)
-    #   Generate URIs for itemprop terms based on namespace of itemtype
     # @option options [Array] :debug
     #   Array to place debug messages
     # @return [reader]
     # @yield  [reader] `self`
     # @yieldparam  [RDF::Reader] reader
@@ -161,23 +162,10 @@
         add_debug(base_el, "parse_whole_doc: base='#{base}'")
       else
         base = RDF::URI("")
       end
       
-      ##
-      # 1. If the title element is not null, then generate the following triple:
-      #
-      #   subject:  the document's current address
-      #   predicate:  http://purl.org/dc/terms/title
-      #   object:  the concatenation of the data of all the child text nodes of the title element,
-      #            in tree order, as a plain literal, with the language information set from
-      #            the language of the title element, if it is not unknown. 
-      doc.css('html>head>title').each do |title|
-        lang = title.attribute('language')
-        add_triple(title, base, RDF::DC.title, title.inner_text)
-      end
-      
       # 2. For each a, area, and link element in the Document, run these substeps:
       #
       # * If the element does not have a rel attribute, then skip this element.
       # * If the element does not have an href attribute, then skip this element.
       # * If resolving the element's href attribute relative to the element is not successful,
@@ -336,19 +324,14 @@
           
           add_debug(element, "gentrips(6.1.3): value=#{value.inspect}")
 
           predicate = if name_uri.absolute?
             name_uri
-          elsif @options[:rdf_terms]
+          else
             # Use the URI of the type to create URIs for @itemprop terms
             add_debug(element, "gentrips: rdf_type=#{rdf_type}")
             predicate = RDF::URI(rdf_type.to_s.sub(/([\/\#])[^\/\#]*$/, '\1' + name))
-          elsif !name.include?(':')
-            s = type.to_s
-            s += '%20' unless s[-1,1] == ':'
-            s += name
-            RDF::MD[s.gsub('#', '%23')]
           end
           add_debug(element, "gentrips(6.1.5): predicate=#{predicate}")
           
           add_triple(element, subject, predicate, value) if predicate
         end
@@ -476,10 +459,15 @@
       when %w(a area link).include?(element.name)
         uri(element.attribute('href'), element.base)
       when %w(object).include?(element.name)
         uri(element.attribute('data'), element.base)
       when %w(time).include?(element.name) && element.has_attribute?('datetime')
-        RDF::Literal::DateTime.new(element.attribute('datetime'))
+        # Lexically scan value and assign appropriate type, otherwise, leave untyped
+        v = element.attribute('datetime').to_s
+        datatype = %w(Date Time DateTime).map {|t| RDF::Literal.const_get(t)}.detect do |dt|
+          v.match(dt::GRAMMAR)
+        end || RDF::Literal
+        datatype.new(v)
       else
         RDF::Literal.new(element.text, :language => element.language)
       end
     end
 
\ No newline at end of file