lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.8 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.9
- old
+ new
@@ -19,12 +19,13 @@
#
# [Nokogiri]: http://nokogiri.org/
#
# Based on processing rules described here:
# @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
- # @see http://www.w3.org/TR/2011/WD-rdfa-core-20110331/ RDFa Core 1.1
- # @see http://www.w3.org/TR/2011/WD-xhtml-rdfa-20110331/ XHTML+RDFa 1.1
+ # @see http://www.w3.org/TR/2011/WD-rdfa-core-20111215/ RDFa Core 1.1
+ # @see http://www.w3.org/TR/2011/WD-xhtml-rdfa-20111215/ XHTML+RDFa 1.1
+ # @see http://www.w3.org/TR/2011/WD-rdfa-in-html-20110525/ HTML+RDFa 1.1
#
# @author [Gregg Kellogg](http://kellogg-assoc.com/)
class Reader < RDF::Reader
format Format
include Expansion
@@ -42,23 +43,44 @@
TERMorCURIEorAbsURIprop = {
:"rdfa1.0" => [:curie],
:"rdfa1.1" => [:term, :curie, :absuri],
}
+ # This expression matches an NCName as defined in
+ # [XML-NAMES](http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName)
+ #
+ # @see http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName
NC_REGEXP = Regexp.new(
%{^
(?!\\\\u0301) # ́ is a non-spacing acute accent.
# It is legal within an XML Name, but not as the first character.
( [a-zA-Z_]
- | \\\\u[0-9a-fA-F]
+ | \\\\u[0-9a-fA-F]{4}
)
+ ( [0-9a-zA-Z_\.-/]
+ | \\\\u([0-9a-fA-F]{4})
+ )*
+ $},
+ Regexp::EXTENDED)
+
+ # This expression matches an term as defined in
+ # [RDFA-CORE](http://www.w3.org/TR/2011/WD-rdfa-core-20111215/#s_terms)
+ #
+ # @see http://www.w3.org/TR/2011/WD-rdfa-core-20111215/#s_terms
+ TERM_REGEXP = Regexp.new(
+ %{^
+ (?!\\\\u0301) # ́ is a non-spacing acute accent.
+ # It is legal within an XML Name, but not as the first character.
+ ( [a-zA-Z_]
+ | \\\\u[0-9a-fA-F]{4}
+ )
( [0-9a-zA-Z_\.-]
| \\\\u([0-9a-fA-F]{4})
)*
$},
Regexp::EXTENDED)
-
+
# Host language
# @attr [:xml1, :xhtml1, :xhtml5, :html4, :html5, :svg]
attr_reader :host_language
# Version
@@ -251,25 +273,28 @@
when :rexml then REXML
end
self.extend(@implementation)
detect_host_language_version(input, options)
+
+ add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}")
+
initialize_xml(input, options) rescue raise RDF::ReaderError.new($!.message)
if (root.nil? && validate?)
add_error(nil, "Empty document", RDF::RDFA.DocumentError)
raise RDF::ReaderError, "Empty Document"
end
add_warning(nil, "Syntax errors:\n#{doc_errors}", RDF::RDFA.DocumentError) if !doc_errors.empty? && validate?
# Section 4.2 RDFa Host Language Conformance
#
- # The Host Language may require the automatic inclusion of one or more default RDFa Profiles.
+ # The Host Language may require the automatic inclusion of one or more Initial Contexts
@host_defaults = {
- :vocabulary => nil,
- :uri_mappings => {},
- :profiles => [],
+ :vocabulary => nil,
+ :uri_mappings => {},
+ :initial_contexts => [],
}
if @version == :"rdfa1.0"
# Add default term mappings
@host_defaults[:term_mappings] = %w(
@@ -278,17 +303,17 @@
).inject({}) { |hash, term| hash[term] = RDF::XHV[term]; hash }
end
case @host_language
when :xml1, :svg
- @host_defaults[:profiles] = [XML_RDFA_PROFILE]
- when :xhtml1, :xhtml5, :html4, :html5
- @host_defaults[:profiles] = [XML_RDFA_PROFILE, XHTML_RDFA_PROFILE]
+ @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT]
+ when :xhtml1
+ @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, XHTML_RDFA_CONTEXT]
+ when :xhtml5, :html4, :html5
+ @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, HTML_RDFA_CONTEXT]
end
- add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}")
-
block.call(self) if block_given?
end
end
##
@@ -411,11 +436,11 @@
# initialize the evaluation context with the appropriate base
evaluation_context = EvaluationContext.new(base, @host_defaults)
if @version != :"rdfa1.0"
# Process default vocabularies
- process_profile(root, @host_defaults[:profiles]) do |which, value|
+ load_initial_contexts(@host_defaults[:initial_contexts]) do |which, value|
add_debug(root) { "parse_whole_document, #{which}: #{value.inspect}"}
case which
when :uri_mappings then evaluation_context.uri_mappings.merge!(value)
when :term_mappings then evaluation_context.term_mappings.merge!(value)
when :default_vocabulary then evaluation_context.default_vocabulary = value
@@ -428,28 +453,28 @@
end
# Parse and process URI mappings, Term mappings and a default vocabulary from @profile
#
# Yields each mapping
- def process_profile(element, profiles)
- profiles.
+ def load_initial_contexts(initial_contexts)
+ initial_contexts.
map {|uri| uri(uri).normalize}.
each do |uri|
# Don't try to open ourselves!
if base_uri == uri
- add_debug(element) {"process_profile: skip recursive profile <#{uri}>"}
+ add_debug(root) {"load_initial_contexts: skip recursive profile <#{uri}>"}
next
end
old_debug = RDF::RDFa.debug?
begin
- add_info(element, "process_profile: load <#{uri}>")
+ add_info(root, "load_initial_contexts: load <#{uri}>")
RDF::RDFa.debug = false
profile = Profile.find(uri)
rescue Exception => e
RDF::RDFa.debug = old_debug
- add_error(element, e.message, RDF::RDFA.ProfileReferenceError)
+ add_error(root, e.message, RDF::RDFA.ProfileReferenceError)
raise # In case we're not in strict mode, we need to be sure processing stops
ensure
RDF::RDFa.debug = old_debug
end
@@ -570,10 +595,11 @@
rel
resource
rev
src
typeof
+ value
vocab
).each do |a|
attrs[a.to_sym] = element.attributes[a].to_s.strip if element.attributes[a]
end
@@ -944,25 +970,27 @@
:validate => validate?,
:canonicalize => canonicalize?)
rescue ArgumentError => e
add_error(element, e.message)
end
- elsif attrs[:datetime]
+ elsif element.name == 'time'
+ # HTML5 support
# Lexically scan value and assign appropriate type, otherwise, leave untyped
- v = element.attribute('datetime').to_s
- datatype = %w(Date Time DateTime Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt|
+ v = (attrs[:datetime] || element.inner_text).to_s
+ datatype = %w(Date Time DateTime Year YearMonth Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt|
v.match(dt::GRAMMAR)
end || RDF::Literal
- add_debug(element) {"[Step 11(1.1)] datetime literal: #{v.class}"}
+ add_debug(element) {"[Step 11(1.1)] <time> literal: #{datatype} #{v.inspect}"}
datatype.new(v)
elsif attrs[:content]
# plain literal
add_debug(element, "[Step 11(1.1)] plain literal (content)")
RDF::Literal.new(attrs[:content], :language => language, :validate => validate?, :canonicalize => canonicalize?)
- elsif attrs[:value]
+ elsif element.name.to_s == 'data' && attrs[:value]
+ # HTML5 support
# plain literal
add_debug(element, "[Step 11(1.1)] plain literal (value)")
- RDF::Literal.new(attrs[:value], :validate => validate?, :canonicalize => canonicalize?)
+ RDF::Literal.new(attrs[:value], :language => language, :validate => validate?, :canonicalize => canonicalize?)
elsif (attrs[:resource] || attrs[:href] || attrs[:src] || attrs[:data]) &&
!(attrs[:rel] || attrs[:rev]) &&
evaluation_context.incomplete_triples.empty? &&
@version != :"rdfa1.0"
if attrs[:resource]