lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.8 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.9

- old
+ new

@@ -19,12 +19,13 @@ # # [Nokogiri]: http://nokogiri.org/ # # Based on processing rules described here: # @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0 - # @see http://www.w3.org/TR/2011/WD-rdfa-core-20110331/ RDFa Core 1.1 - # @see http://www.w3.org/TR/2011/WD-xhtml-rdfa-20110331/ XHTML+RDFa 1.1 + # @see http://www.w3.org/TR/2011/WD-rdfa-core-20111215/ RDFa Core 1.1 + # @see http://www.w3.org/TR/2011/WD-xhtml-rdfa-20111215/ XHTML+RDFa 1.1 + # @see http://www.w3.org/TR/2011/WD-rdfa-in-html-20110525/ HTML+RDFa 1.1 # # @author [Gregg Kellogg](http://kellogg-assoc.com/) class Reader < RDF::Reader format Format include Expansion @@ -42,23 +43,44 @@ TERMorCURIEorAbsURIprop = { :"rdfa1.0" => [:curie], :"rdfa1.1" => [:term, :curie, :absuri], } + # This expression matches an NCName as defined in + # [XML-NAMES](http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName) + # + # @see http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName NC_REGEXP = Regexp.new( %{^ (?!\\\\u0301) # &#x301; is a non-spacing acute accent. # It is legal within an XML Name, but not as the first character. ( [a-zA-Z_] - | \\\\u[0-9a-fA-F] + | \\\\u[0-9a-fA-F]{4} ) + ( [0-9a-zA-Z_\.-/] + | \\\\u([0-9a-fA-F]{4}) + )* + $}, + Regexp::EXTENDED) + + # This expression matches an term as defined in + # [RDFA-CORE](http://www.w3.org/TR/2011/WD-rdfa-core-20111215/#s_terms) + # + # @see http://www.w3.org/TR/2011/WD-rdfa-core-20111215/#s_terms + TERM_REGEXP = Regexp.new( + %{^ + (?!\\\\u0301) # &#x301; is a non-spacing acute accent. + # It is legal within an XML Name, but not as the first character. + ( [a-zA-Z_] + | \\\\u[0-9a-fA-F]{4} + ) ( [0-9a-zA-Z_\.-] | \\\\u([0-9a-fA-F]{4}) )* $}, Regexp::EXTENDED) - + # Host language # @attr [:xml1, :xhtml1, :xhtml5, :html4, :html5, :svg] attr_reader :host_language # Version @@ -251,25 +273,28 @@ when :rexml then REXML end self.extend(@implementation) detect_host_language_version(input, options) + + add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}") + initialize_xml(input, options) rescue raise RDF::ReaderError.new($!.message) if (root.nil? && validate?) add_error(nil, "Empty document", RDF::RDFA.DocumentError) raise RDF::ReaderError, "Empty Document" end add_warning(nil, "Syntax errors:\n#{doc_errors}", RDF::RDFA.DocumentError) if !doc_errors.empty? && validate? # Section 4.2 RDFa Host Language Conformance # - # The Host Language may require the automatic inclusion of one or more default RDFa Profiles. + # The Host Language may require the automatic inclusion of one or more Initial Contexts @host_defaults = { - :vocabulary => nil, - :uri_mappings => {}, - :profiles => [], + :vocabulary => nil, + :uri_mappings => {}, + :initial_contexts => [], } if @version == :"rdfa1.0" # Add default term mappings @host_defaults[:term_mappings] = %w( @@ -278,17 +303,17 @@ ).inject({}) { |hash, term| hash[term] = RDF::XHV[term]; hash } end case @host_language when :xml1, :svg - @host_defaults[:profiles] = [XML_RDFA_PROFILE] - when :xhtml1, :xhtml5, :html4, :html5 - @host_defaults[:profiles] = [XML_RDFA_PROFILE, XHTML_RDFA_PROFILE] + @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT] + when :xhtml1 + @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, XHTML_RDFA_CONTEXT] + when :xhtml5, :html4, :html5 + @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, HTML_RDFA_CONTEXT] end - add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}") - block.call(self) if block_given? end end ## @@ -411,11 +436,11 @@ # initialize the evaluation context with the appropriate base evaluation_context = EvaluationContext.new(base, @host_defaults) if @version != :"rdfa1.0" # Process default vocabularies - process_profile(root, @host_defaults[:profiles]) do |which, value| + load_initial_contexts(@host_defaults[:initial_contexts]) do |which, value| add_debug(root) { "parse_whole_document, #{which}: #{value.inspect}"} case which when :uri_mappings then evaluation_context.uri_mappings.merge!(value) when :term_mappings then evaluation_context.term_mappings.merge!(value) when :default_vocabulary then evaluation_context.default_vocabulary = value @@ -428,28 +453,28 @@ end # Parse and process URI mappings, Term mappings and a default vocabulary from @profile # # Yields each mapping - def process_profile(element, profiles) - profiles. + def load_initial_contexts(initial_contexts) + initial_contexts. map {|uri| uri(uri).normalize}. each do |uri| # Don't try to open ourselves! if base_uri == uri - add_debug(element) {"process_profile: skip recursive profile <#{uri}>"} + add_debug(root) {"load_initial_contexts: skip recursive profile <#{uri}>"} next end old_debug = RDF::RDFa.debug? begin - add_info(element, "process_profile: load <#{uri}>") + add_info(root, "load_initial_contexts: load <#{uri}>") RDF::RDFa.debug = false profile = Profile.find(uri) rescue Exception => e RDF::RDFa.debug = old_debug - add_error(element, e.message, RDF::RDFA.ProfileReferenceError) + add_error(root, e.message, RDF::RDFA.ProfileReferenceError) raise # In case we're not in strict mode, we need to be sure processing stops ensure RDF::RDFa.debug = old_debug end @@ -570,10 +595,11 @@ rel resource rev src typeof + value vocab ).each do |a| attrs[a.to_sym] = element.attributes[a].to_s.strip if element.attributes[a] end @@ -944,25 +970,27 @@ :validate => validate?, :canonicalize => canonicalize?) rescue ArgumentError => e add_error(element, e.message) end - elsif attrs[:datetime] + elsif element.name == 'time' + # HTML5 support # Lexically scan value and assign appropriate type, otherwise, leave untyped - v = element.attribute('datetime').to_s - datatype = %w(Date Time DateTime Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt| + v = (attrs[:datetime] || element.inner_text).to_s + datatype = %w(Date Time DateTime Year YearMonth Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt| v.match(dt::GRAMMAR) end || RDF::Literal - add_debug(element) {"[Step 11(1.1)] datetime literal: #{v.class}"} + add_debug(element) {"[Step 11(1.1)] <time> literal: #{datatype} #{v.inspect}"} datatype.new(v) elsif attrs[:content] # plain literal add_debug(element, "[Step 11(1.1)] plain literal (content)") RDF::Literal.new(attrs[:content], :language => language, :validate => validate?, :canonicalize => canonicalize?) - elsif attrs[:value] + elsif element.name.to_s == 'data' && attrs[:value] + # HTML5 support # plain literal add_debug(element, "[Step 11(1.1)] plain literal (value)") - RDF::Literal.new(attrs[:value], :validate => validate?, :canonicalize => canonicalize?) + RDF::Literal.new(attrs[:value], :language => language, :validate => validate?, :canonicalize => canonicalize?) elsif (attrs[:resource] || attrs[:href] || attrs[:src] || attrs[:data]) && !(attrs[:rel] || attrs[:rev]) && evaluation_context.incomplete_triples.empty? && @version != :"rdfa1.0" if attrs[:resource]