require 'treetop' Treetop.load(File.join(File.dirname(__FILE__), "n3_grammar")) module RdfContext class Parser; end class N3Parser < Parser N3_KEYWORDS = %w(a is of has keywords prefix base true false forSome forAny) # Parse N3 document from a string or input stream to closure or graph. # # If the parser is called with a block, triples are passed to the block rather # than added to the graph. # # @param [String] n3_str:: the Notation3/Turtle string # @param [String] uri:: the URI of the document # @option options [Graph] :graph (Graph.new) Graph to parse into, otherwise a new Graph instance is created # @option options [Array] :debug (nil) Array to place debug messages # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise # @return [Graph] # @raise RdfException or subclass # # @author Patrick Sinclair (metade) # @author Gregg Kellogg def parse(stream, uri = nil, options = {}, &block) # :yields: triple super @callback = block parser = N3GrammerParser.new @doc = stream.respond_to?(:read) ? (stream.rewind; stream.read) : stream @default_ns = Namespace.new("#{uri}#", "") if uri add_debug("@default_ns", "#{@default_ns.inspect}") @graph.allow_n3 = true document = parser.parse(@doc) unless document puts parser.inspect if ::RdfContext::debug? reason = parser.failure_reason raise ParserException.new(reason) end process_statements(document) @graph end protected def namespace(uri, prefix) add_debug("namesspace", "'#{prefix}' <#{uri}>") uri = @default_ns.uri if uri == '#' @graph.bind(Namespace.new(uri, prefix)) add_debug("namesspace", "ns = #{@graph.nsbinding.inspect}") end def process_statements(document) document.elements.find_all do |e| s = e.elements.first add_debug(*s.info("process_statements")) if s.respond_to?(:subject) subject = process_expression(s.subject) add_debug(*s.info("process_statements(#{subject})")) properties = process_properties(s.property_list) properties.each do |p| predicate = process_verb(p.verb) add_debug(*p.info("process_statements(#{subject}, #{predicate})")) raise ParserException, %Q(Illegal statment: "#{predicate}" missing object) unless p.respond_to?(:object_list) objects = process_objects(p.object_list) objects.each do |object| if p.verb.respond_to?(:invert) add_triple("statement", object, predicate, subject) else add_triple("statement", subject, predicate, object) end end end elsif s.respond_to?(:anonnode) process_anonnode(s) elsif s.respond_to?(:pathitem) process_path(s) elsif s.respond_to?(:declaration) if s.respond_to?(:nprefix) add_debug(*s.info("process_statements(namespace)")) keyword_check("prefix") if s.text_value.index("prefix") == 0 uri = process_uri(s.explicituri.uri) namespace(uri, s.nprefix.text_value) elsif s.respond_to?(:base) add_debug(*s.info("process_statements(base)")) keyword_check("base") if s.text_value.index("base") == 0 # Base, set or update document URI uri = s.explicituri.uri.text_value @default_ns = Namespace.new(process_uri(uri), "") # Don't normalize add_debug("@default_ns", "#{@default_ns.inspect}") @uri = process_uri(uri) add_debug("@base", "#{@uri}") @uri elsif s.respond_to?(:keywords) add_debug(*s.info("process_statements(keywords)")) keyword_check("keywords") if s.text_value.index("keywords") == 0 @keywords = process_barename_csl(s.barename_csl) ||[] add_debug("@keywords", @keywords.inspect) if (@keywords & N3_KEYWORDS) != @keywords raise ParserException, "undefined keywords used: #{(@keywords - N3_KEYWORDS).to_sentence}" if @strict end end end end end def process_barename_csl(list) #add_debug(*list.info("process_barename_csl(list)")) res = [list.barename.text_value] if list.respond_to?(:barename) rest = process_barename_csl(list.barename_csl_tail) if list.respond_to?(:barename_csl_tail) rest ? res + rest : res end def process_anonnode(anonnode) add_debug(*anonnode.info("process_anonnode")) bnode = BNode.new if anonnode.respond_to?(:property_list) properties = process_properties(anonnode.property_list) properties.each do |p| predicate = process_verb(p.verb) add_debug(*p.info("anonnode[#{predicate}]")) objects = process_objects(p.object_list) objects.each { |object| add_triple("anonnode", bnode, predicate, object) } end elsif anonnode.respond_to?(:path_list) objects = process_objects(anonnode.path_list) last = objects.pop first_bnode = bnode objects.each do |object| add_triple("anonnode", first_bnode, RDF_NS.first, object) rest_bnode = BNode.new add_triple("anonnode", first_bnode, RDF_NS.rest, rest_bnode) first_bnode = rest_bnode end if last add_triple("anonnode", first_bnode, RDF_NS.first, last) add_triple("anonnode", first_bnode, RDF_NS.rest, RDF_NS.nil) else bnode = RDF_NS.nil end end bnode end def process_verb(verb) add_debug(*verb.info("process_verb")) case verb.text_value when "a" # If "a" is a keyword, then it's RDF_TYPE, otherwise it's expanded from the default namespace if @keywords.nil? || @keywords.include?("a") RDF_TYPE else build_uri("a") end when "@a" then RDF_TYPE when "=" then OWL_NS.sameAs when "=>" then LOG_NS.implies when "<=" then LOG_NS.implies when /^(@?is)\s+.*\s+(@?of)$/ keyword_check("is") if $1 == "is" keyword_check("of") if $2 == "of" process_expression(verb.prop) when /^has\s+/ keyword_check("has") process_expression(verb.prop) else if verb.respond_to?(:prop) process_expression(verb.prop) else process_expression(verb) end end end def process_expression(expression) if expression.respond_to?(:pathitem) && expression.respond_to?(:expression) add_debug(*expression.info("process_expression(pathitem && expression)")) process_path(expression) # Returns last object in chain elsif expression.respond_to?(:uri) add_debug(*expression.info("process_expression(uri)")) process_uri(expression.uri) elsif expression.respond_to?(:localname) add_debug(*expression.info("process_expression(localname)")) build_uri(expression) elsif expression.respond_to?(:anonnode) add_debug(*expression.info("process_expression(anonnode)")) process_anonnode(expression) elsif expression.respond_to?(:literal) add_debug(*expression.info("process_expression(literal)")) process_literal(expression) elsif expression.respond_to?(:numericliteral) add_debug(*expression.info("process_expression(numericliteral)")) process_numeric_literal(expression) elsif expression.respond_to?(:boolean) add_debug(*expression.info("process_expression(boolean)")) barename = expression.text_value.to_s if @keywords && !@keywords.include?(barename) build_uri(barename) else Literal.typed(barename.delete("@"), XSD_NS.boolean) end elsif expression.respond_to?(:barename) add_debug(*expression.info("process_expression(barename)")) barename = expression.text_value.to_s # Should only happen if @keywords is defined, and text_value is not a defined keyword case barename when "true" then Literal.typed("true", XSD_NS.boolean) when "false" then Literal.typed("false", XSD_NS.boolean) else # create URI using barename, unless it's in defined set, in which case it's an error raise ParserException, %Q(Keyword "#{barename}" used as expression) if @keywords && @keywords.include?(barename) build_uri(barename) end else add_debug(*expression.info("process_expression(else)")) build_uri(expression) end end # Process a path, such as: # :a.:b means [is :b of :a] # :a!:b means [is :b of :a] # :a^:b means [:b :a] # # Elements may be strug together, with the last element the verb applied to the previous expression: # :a.:b.:c means [is :c of [ is :b of :a]] # :a!:b^:c meands [:c [ is :b of :a]] def process_path(path) add_debug(*path.info("process_path")) object = process_expression(path.pathitem) # Create a list of direction/predicate pairs path_list = process_path_list(path.expression, path.respond_to?(:reverse)) #puts path_list.inspect # Now we should have the following # [ # [:forward, b] # [:forward, c] # ] path_list.each do |p| reverse, pred = p bnode = BNode.new if reverse add_triple("path(#{reverse})", bnode, pred, object) else add_triple("path(#{reverse})", object, pred, bnode) end object = bnode end object end # Returns array of [:forward/:reverse, element] pairs def process_path_list(path, reverse) add_debug(*path.info("process_path_list(#{reverse})")) if path.respond_to?(:pathitem) [[reverse, process_expression(path.pathitem)]] + process_path_list(path.expression, path.respond_to?(:reverse)) else [[reverse, process_expression(path)]] end end def process_uri(uri) uri = uri.text_value if uri.respond_to?(:text_value) URIRef.intern(uri.to_s.rdf_unescape, @uri, :normalize => false) end def process_properties(properties) add_debug(*properties.info("process_properties")) result = [] result << properties if properties.respond_to?(:verb) result << process_properties(properties.property_list) if properties.respond_to?(:property_list) result.flatten end def process_objects(objects) add_debug(*objects.info("process_objects")) result = [] if objects.respond_to?(:object) result << process_expression(objects.object) elsif objects.respond_to?(:pathitem) result << process_expression(objects) elsif objects.respond_to?(:expression) result << process_expression(objects.expression) result << process_objects(objects.path_list) if objects.respond_to?(:path_list) elsif !objects.text_value.empty? || objects.respond_to?(:nprefix) result << process_expression(objects) end result << process_objects(objects.object_list) if objects.respond_to?(:object_list) result.flatten end def process_literal(object) add_debug(*object.info("process_literal")) encoding, language = nil, nil string, type = object.elements unless type.elements.nil? #puts type.elements.inspect if (type.elements[0].text_value=='@') language = type.elements[1].text_value else encoding = process_expression(type.elements[1]) end end # Evaluate text_value to remove redundant escapes #puts string.elements[1].text_value.dump lit = Literal.n3_encoded(string.elements[1].text_value, language, encoding) raise ParserException, %(Typed literal has an invalid lexical value: #{encoding.to_n3} "#{lit.contents}") if @strict && !lit.valid? lit end def process_numeric_literal(object) add_debug(*object.info("process_numeric_literal")) Literal.typed(object.text_value, XSD_NS.send(object.numericliteral)) end def build_uri(expression) prefix = expression.respond_to?(:nprefix) ? expression.nprefix.text_value.to_s : "" localname = expression.localname.text_value if expression.respond_to?(:localname) localname ||= (expression.respond_to?(:text_value) ? expression.text_value : expression).to_s.sub(/^:/, "") localname = nil if localname.empty? # In N3/Turtle "_:" is not named if expression.respond_to?(:info) add_debug(*expression.info("build_uri(#{prefix.inspect}, #{localname.inspect})")) else add_debug("", "build_uri(#{prefix.inspect}, #{localname.inspect})") end uri = if @graph.nsbinding[prefix] @graph.nsbinding[prefix] + localname.to_s elsif prefix == '_' BNode.new(localname, @named_bnodes) elsif prefix == "rdf" # A special case RDF_NS + localname.to_s elsif prefix == "xsd" # A special case XSD_NS + localname.to_s else @default_ns ||= Namespace.new("#{@uri}#", "") @default_ns + localname.to_s end add_debug(*expression.info("build_uri: #{uri.inspect}")) if expression.respond_to?(:info) uri end # Is this an allowable keyword? def keyword_check(kw) unless (@keywords || %w(a is of has)).include?(kw) raise ParserException, "unqualified keyword '#{kw}' used without @keyword directive" if @strict end end end end module Treetop module Runtime class SyntaxNode # Brief information about a syntax node def info(ctx = "") m = self.singleton_methods(true) if m.empty? ["@#{self.interval.first}", "#{ctx}['#{self.text_value}']"] else ["@#{self.interval.first}", "#{ctx}[" + self.singleton_methods(true).map do |m| v = self.send(m) v = v.text_value if v.is_a?(SyntaxNode) "#{m}='#{v}'" end.join(", ") + "]"] end end end end end