require 'treetop' if defined?(::Encoding) # load full grammar Treetop.load(File.join(File.dirname(__FILE__), "reader", "n3_grammar")) else # load 1.8 grammar, doesn't include U00010000-\U000effff Treetop.load(File.join(File.dirname(__FILE__), "reader", "n3_grammar_18")) end module RDF::N3 ## # A Notation-3/Turtle parser in Ruby # # @author [Gregg Kellogg](http://kellogg-assoc.com/) class Reader < RDF::Reader format Format N3_KEYWORDS = %w(a is of has keywords prefix base true false forSome forAny) NC_REGEXP = Regexp.new( %{^ (?!\\\\u0301) # ́ is a non-spacing acute accent. # It is legal within an XML Name, but not as the first character. ( [a-zA-Z_] | \\\\u[0-9a-fA-F] ) ( [0-9a-zA-Z_\.-] | \\\\u([0-9a-fA-F]{4}) )* $}, Regexp::EXTENDED) ## # Initializes the N3 reader instance. # # @param [IO, File, String] input # @option options [Array] :debug Array to place debug messages # @option options [Boolean] :strict Raise Error if true, continue with lax parsing, otherwise # @option options [Boolean] :base_uri (nil) Base URI to use for relative URIs. # @return [reader] # @yield [reader] # @yieldparam [Reader] reader # @raise [Error]:: Raises RDF::ReaderError if _strict_ def initialize(input = $stdin, options = {}, &block) super do @debug = options[:debug] @strict = options[:strict] @uri_mappings = {} @uri = uri(options[:base_uri], nil, false) @doc = input.respond_to?(:read) ? (input.rewind; input.read) : input @default_ns = uri("#{options[:base_uri]}#", nil, false) if @uri add_debug("@default_ns", "#{@default_ns.inspect}") block.call(self) if block_given? end end # No need to rewind, as parsing is done in initialize def rewind; end # Document closed when read in initialize def close; end ## # Iterates the given block for each RDF statement in the input. # # @yield [statement] # @yieldparam [RDF::Statement] statement # @return [void] def each_statement(&block) @callback = block parser = N3GrammerParser.new document = parser.parse(@doc) unless document puts parser.inspect if ::RDF::N3::debug? reason = parser.failure_reason raise RDF::ReaderError, reason end process_statements(document) end ## # Iterates the given block for each RDF triple in the input. # # @yield [subject, predicate, object] # @yieldparam [RDF::Resource] subject # @yieldparam [RDF::URI] predicate # @yieldparam [RDF::Value] object # @return [void] def each_triple(&block) each_statement do |statement| block.call(*statement.to_triple) end end private # Keep track of allocated BNodes def bnode(value = nil) @bnode_cache ||= {} @bnode_cache[value.to_s] ||= RDF::Node.new(value) end # Add debug event to debug array, if specified # # @param [XML Node, any] node:: XML Node or string for showing context # @param [String] message:: def add_debug(node, message) puts "#{node}: #{message}" if ::RDF::N3::debug? @debug << "#{node}: #{message}" if @debug.is_a?(Array) end # add a statement, object can be literal or URI or bnode # # @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context # @param [URI, Node] subject:: the subject of the statement # @param [URI] predicate:: the predicate of the statement # @param [URI, Node, Literal] object:: the object of the statement # @return [Statement]:: Added statement # @raise [RDF::ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_. def add_triple(node, subject, predicate, object) statement = RDF::Statement.new(subject, predicate, object) add_debug(node, "statement: #{statement}") @callback.call(statement) end def namespace(uri, prefix) uri = uri.to_s if uri == "#" uri = @default_ns end add_debug("namesspace", "'#{prefix}' <#{uri}>") @uri_mappings[prefix] = RDF::URI.intern(uri) end def process_statements(document) document.elements.find_all do |e| s = e.elements.first add_debug(*s.info("process_statements")) if s.respond_to?(:subject) subject = process_expression(s.subject) add_debug(*s.info("process_statements(#{subject})")) properties = process_properties(s.property_list) properties.each do |p| predicate = process_verb(p.verb) add_debug(*p.info("process_statements(#{subject}, #{predicate})")) raise RDF::ReaderError, %Q(Illegal statment: "#{predicate}" missing object) unless p.respond_to?(:object_list) objects = process_objects(p.object_list) objects.each do |object| if p.verb.respond_to?(:invert) add_triple("statement", object, predicate, subject) else add_triple("statement", subject, predicate, object) end end end elsif s.respond_to?(:anonnode) process_anonnode(s) elsif s.respond_to?(:pathitem) process_path(s) elsif s.respond_to?(:declaration) if s.respond_to?(:nprefix) add_debug(*s.info("process_statements(namespace)")) keyword_check("prefix") if s.text_value.index("prefix") == 0 uri = process_uri(s.explicituri.uri) namespace(uri, s.nprefix.text_value) elsif s.respond_to?(:base) add_debug(*s.info("process_statements(base)")) keyword_check("base") if s.text_value.index("base") == 0 # Base, set or update document URI uri = s.explicituri.uri.text_value @uri = process_uri(uri) # The empty prefix "" is by default , bound to "#" -- the local namespace of the file. # The parser behaves as though there were a # @prefix : <#>. # just before the file. # This means that <#foo> can be written :foo and using @keywords one can reduce that to foo. @default_ns = uri.match(/[\/\#]$/) ? @uri : process_uri("#{uri}#") add_debug("@default_ns", "#{@default_ns.inspect}") add_debug("@base", "#{@uri}") @uri elsif s.respond_to?(:keywords) add_debug(*s.info("process_statements(keywords)")) keyword_check("keywords") if s.text_value.index("keywords") == 0 @keywords = process_barename_csl(s.barename_csl) ||[] add_debug("@keywords", @keywords.inspect) if (@keywords & N3_KEYWORDS) != @keywords raise RDF::ReaderError, "undefined keywords used: #{(@keywords - N3_KEYWORDS).to_sentence}" if @strict end end end end end def process_barename_csl(list) #add_debug(*list.info("process_barename_csl(list)")) res = [list.barename.text_value] if list.respond_to?(:barename) rest = process_barename_csl(list.barename_csl_tail) if list.respond_to?(:barename_csl_tail) rest ? res + rest : res end def process_anonnode(anonnode) add_debug(*anonnode.info("process_anonnode")) bnode = RDF::Node.new if anonnode.respond_to?(:property_list) properties = process_properties(anonnode.property_list) properties.each do |p| predicate = process_verb(p.verb) add_debug(*p.info("anonnode[#{predicate}]")) objects = process_objects(p.object_list) objects.each { |object| add_triple("anonnode", bnode, predicate, object) } end elsif anonnode.respond_to?(:path_list) objects = process_objects(anonnode.path_list) last = objects.pop first_bnode = bnode objects.each do |object| add_triple("anonnode", first_bnode, RDF.first, object) rest_bnode = RDF::Node.new add_triple("anonnode", first_bnode, RDF.rest, rest_bnode) first_bnode = rest_bnode end if last add_triple("anonnode", first_bnode, RDF.first, last) add_triple("anonnode", first_bnode, RDF.rest, RDF.nil) else bnode = RDF.nil end end bnode end def process_verb(verb) add_debug(*verb.info("process_verb")) case verb.text_value when "a" # If "a" is a keyword, then it's rdf:type, otherwise it's expanded from the default namespace if @keywords.nil? || @keywords.include?("a") RDF.type else build_uri("a") end when "@a" then RDF.type when "=" then RDF::OWL.sameAs when "=>" then RDF::LOG.implies when "<=" then RDF::LOG.implies when /^(@?is)\s+.*\s+(@?of)$/ keyword_check("is") if $1 == "is" keyword_check("of") if $2 == "of" process_expression(verb.prop) when /^has\s+/ keyword_check("has") process_expression(verb.prop) else if verb.respond_to?(:prop) process_expression(verb.prop) else process_expression(verb) end end end def process_expression(expression) if expression.respond_to?(:pathitem) && expression.respond_to?(:expression) add_debug(*expression.info("process_expression(pathitem && expression)")) process_path(expression) # Returns last object in chain elsif expression.respond_to?(:uri) add_debug(*expression.info("process_expression(uri)")) process_uri(expression.uri) elsif expression.respond_to?(:localname) add_debug(*expression.info("process_expression(localname)")) build_uri(expression) elsif expression.respond_to?(:anonnode) add_debug(*expression.info("process_expression(anonnode)")) process_anonnode(expression) elsif expression.respond_to?(:literal) add_debug(*expression.info("process_expression(literal)")) process_literal(expression) elsif expression.respond_to?(:numericliteral) add_debug(*expression.info("process_expression(numericliteral)")) process_numeric_literal(expression) elsif expression.respond_to?(:boolean) add_debug(*expression.info("process_expression(boolean)")) barename = expression.text_value.to_s if @keywords && !@keywords.include?(barename) build_uri(barename) else RDF::Literal.new(barename.delete("@"), :datatype => RDF::XSD.boolean, :validate => @strict, :canonicalize => true) end elsif expression.respond_to?(:barename) add_debug(*expression.info("process_expression(barename)")) barename = expression.text_value.to_s # Should only happen if @keywords is defined, and text_value is not a defined keyword case barename when "true" then RDF::Literal.new("true", :datatype => RDF::XSD.boolean) when "false" then RDF::Literal.new("false", :datatype => RDF::XSD.boolean) else # create URI using barename, unless it's in defined set, in which case it's an error raise RDF::ReaderError, %Q(Keyword "#{barename}" used as expression) if @keywords && @keywords.include?(barename) build_uri(barename) end else add_debug(*expression.info("process_expression(else)")) build_uri(expression) end end # Process a path, such as: # :a.:b means [is :b of :a] # :a!:b means [is :b of :a] # :a^:b means [:b :a] # # Elements may be strug together, with the last element the verb applied to the previous expression: # :a.:b.:c means [is :c of [ is :b of :a]] # :a!:b^:c meands [:c [ is :b of :a]] def process_path(path) add_debug(*path.info("process_path")) object = process_expression(path.pathitem) # Create a list of direction/predicate pairs path_list = process_path_list(path.expression, path.respond_to?(:reverse)) #puts path_list.inspect # Now we should have the following # [ # [:forward, b] # [:forward, c] # ] path_list.each do |p| reverse, pred = p bnode = RDF::Node.new if reverse add_triple("path(#{reverse})", bnode, pred, object) else add_triple("path(#{reverse})", object, pred, bnode) end object = bnode end object end # Returns array of [:forward/:reverse, element] pairs def process_path_list(path, reverse) add_debug(*path.info("process_path_list(#{reverse})")) if path.respond_to?(:pathitem) [[reverse, process_expression(path.pathitem)]] + process_path_list(path.expression, path.respond_to?(:reverse)) else [[reverse, process_expression(path)]] end end def process_uri(uri) uri = uri.text_value if uri.respond_to?(:text_value) uri(@uri, RDF::NTriples.unescape(uri)) end def process_properties(properties) add_debug(*properties.info("process_properties")) result = [] result << properties if properties.respond_to?(:verb) result << process_properties(properties.property_list) if properties.respond_to?(:property_list) result.flatten end def process_objects(objects) add_debug(*objects.info("process_objects")) result = [] if objects.respond_to?(:object) result << process_expression(objects.object) elsif objects.respond_to?(:pathitem) result << process_expression(objects) elsif objects.respond_to?(:expression) result << process_expression(objects.expression) result << process_objects(objects.path_list) if objects.respond_to?(:path_list) elsif !objects.text_value.empty? || objects.respond_to?(:nprefix) result << process_expression(objects) end result << process_objects(objects.object_list) if objects.respond_to?(:object_list) result.flatten end def process_literal(object) add_debug(*object.info("process_literal")) encoding, language = nil, nil string, type = object.elements unless type.elements.nil? #puts type.elements.inspect if (type.elements[0].text_value=='@') language = type.elements[1].text_value else encoding = process_expression(type.elements[1]) end end # Evaluate text_value to remove redundant escapes #puts string.elements[1].text_value.dump RDF::Literal.new(RDF::NTriples.unescape(string.elements[1].text_value), :language => language, :validate => @strict, :datatype => encoding, :canonicalize => true) end def process_numeric_literal(object) add_debug(*object.info("process_numeric_literal")) RDF::Literal.new(RDF::NTriples.unescape(object.text_value), :datatype => RDF::XSD[object.numericliteral], :validate => @strict, :canonicalize => true) end def build_uri(expression) prefix = expression.respond_to?(:nprefix) ? expression.nprefix.text_value.to_s : "" localname = expression.localname.text_value if expression.respond_to?(:localname) localname ||= (expression.respond_to?(:text_value) ? expression.text_value : expression).to_s.sub(/^:/, "") localname = nil if localname.empty? # In N3/Turtle "_:" is not named if expression.respond_to?(:info) add_debug(*expression.info("build_uri(#{prefix.inspect}, #{localname.inspect})")) else add_debug("", "build_uri(#{prefix.inspect}, #{localname.inspect})") end uri = if @uri_mappings[prefix] add_debug(*expression.info("build_uri: (ns): #{@uri_mappings[prefix]}, #{localname}")) if expression.respond_to?(:info) ns(prefix, localname.to_s) elsif prefix == '_' add_debug(*expression.info("build_uri: (bnode)")) if expression.respond_to?(:info) bnode(localname) elsif prefix == "rdf" add_debug(*expression.info("build_uri: (rdf)")) if expression.respond_to?(:info) # A special case RDF::RDF[localname.to_s] elsif prefix == "xsd" add_debug(*expression.info("build_uri: (xsd)")) if expression.respond_to?(:info) # A special case RDF::XSD[localname.to_s] else add_debug(*expression.info("build_uri: (default_ns)")) if expression.respond_to?(:info) @default_ns ||= uri("#{@uri}#", nil) ns(nil, localname.to_s) end add_debug(*expression.info("build_uri: #{uri.inspect}")) if expression.respond_to?(:info) uri end # Is this an allowable keyword? def keyword_check(kw) unless (@keywords || %w(a is of has)).include?(kw) raise RDF::ReaderError, "unqualified keyword '#{kw}' used without @keyword directive" if @strict end end # Create URIs def uri(value, append, normalize = false) value = RDF::URI.intern(value) value = value.join(append) if append value end def ns(prefix, suffix) prefix = prefix.nil? ? @default_ns.to_s : @uri_mappings[prefix].to_s suffix = suffix.to_s.sub(/^\#/, "") if prefix.index("#") add_debug("ns", "prefix: '#{prefix}', suffix: '#{suffix}'") RDF::URI.intern(prefix + suffix) end end end module Treetop module Runtime class SyntaxNode # Brief information about a syntax node def info(ctx = "") m = self.singleton_methods(true) if m.empty? ["@#{self.interval.first}", "#{ctx}['#{self.text_value}']"] else ["@#{self.interval.first}", "#{ctx}[" + self.singleton_methods(true).map do |mm| v = self.send(mm) v = v.text_value if v.is_a?(SyntaxNode) "#{mm}='#{v}'" end.join(", ") + "]"] end end end end end