require 'open-uri' module JSON::LD ## # A JSON-LD parser in Ruby. # # @see http://json-ld.org/spec/ED/20110507/ # @author [Gregg Kellogg](http://greggkellogg.net/) class Reader < RDF::Reader format Format ## # The graph constructed when parsing. # # @return [RDF::Graph] attr_reader :graph ## # Context # # The `@context` keyword is used to change how the JSON-LD processor evaluates key- value pairs. In this # case, it was used to map one string (`'myvocab'`) to another string, which is interpreted as a IRI. In the # example above, the `myvocab` string is replaced with "http://example.org/myvocab#" when it is detected. In # the example above, `"myvocab:personality"` would expand to "http://example.org/myvocab#personality". # # This mechanism is a short-hand for RDF, called a `CURIE`, and provides developers an unambiguous way to # map any JSON value to RDF. # # @private class EvaluationContext # :nodoc: # The base. # # The `@base` string is a special keyword that states that any relative IRI MUST be appended to the string # specified by `@base`. # # @attr [RDF::URI] attr :base, true # A list of current, in-scope URI mappings. # # @attr [Hash{String => String}] attr :mappings, true # The default vocabulary # # A value to use as the prefix URI when a term is used. # This specification does not define an initial setting for the default vocabulary. # Host Languages may define an initial setting. # # @attr [String] attr :vocab, true # Type coersion # # The @coerce keyword is used to specify type coersion rules for the data. For each key in the map, the # key is the type to be coerced to and the value is the vocabulary term to be coerced. Type coersion for # the key `@iri` asserts that all vocabulary terms listed should undergo coercion to an IRI, # including `@base` processing for relative IRIs and CURIE processing for compact URI Expressions like # `foaf:homepage`. # # As the value may be an array, this is maintained as a reverse mapping of `property` => `type`. # # @attr [Hash{String => String}] attr :coerce # List coercion # # The @list keyword is used to specify that properties having an array value are to be treated # as an ordered list, rather than a normal unordered list # @attr [Array<String>] attr :list ## # Create new evaluation context # @yield [ec] # @yieldparam [EvaluationContext] # @return [EvaluationContext] def initialize @base = nil @mappings = {} @vocab = nil @coerce = {} @list = [] yield(self) if block_given? end def inspect v = %w([EvaluationContext) + %w(base vocab).map {|a| "#{a}='#{self.send(a).inspect}'"} v << "mappings[#{mappings.keys.length}]=#{mappings}" v << "coerce[#{coerce.keys.length}]=#{coerce}" v << "list[#{list.length}]=#{list}" v.join(", ") + "]" end end ## # Initializes the RDF/JSON reader instance. # # @param [IO, File, String] input # @param [Hash{Symbol => Object}] options # any additional options (see `RDF::Reader#initialize`) # @yield [reader] `self` # @yieldparam [RDF::Reader] reader # @yieldreturn [void] ignored # @raise [RDF::ReaderError] if the JSON document cannot be loaded def initialize(input = $stdin, options = {}, &block) super do @base_uri = uri(options[:base_uri]) if options[:base_uri] begin @doc = JSON.load(input) rescue JSON::ParserError => e raise RDF::ReaderError, "Failed to parse input document: #{e.message}" if validate? @doc = JSON.parse("{}") end if block_given? case block.arity when 0 then instance_eval(&block) else block.call(self) end end end end ## # @private # @see RDF::Reader#each_statement def each_statement(&block) @callback = block # initialize the evaluation context with the appropriate base ec = EvaluationContext.new do |e| e.base = @base_uri if @base_uri parse_context(e, DEFAULT_CONTEXT) end traverse("", @doc, nil, nil, ec) end ## # @private # @see RDF::Reader#each_triple def each_triple(&block) each_statement do |statement| block.call(*statement.to_triple) end end private ## # # @param [String] path # location within JSON hash # @param [Hash, Array, String] element # The current JSON element being processed # @param [RDF::URI] subject # Inherited subject # @param [RDF::URI] property # Inherited property # @param [EvaluationContext] ec # The active context def traverse(path, element, subject, property, ec) add_debug(path) {"traverse: s=#{subject.inspect}, p=#{property.inspect}, e=#{ec.inspect}"} object = nil case element when Hash # 2) ... For each key-value # pair in the associative array, using the newly created processor state do the # following: # 2.1) If a @context keyword is found, the processor merges each key-value pair in # the local context into the active context ... if element['@context'] # Merge context ec = parse_context(ec.dup, element['@context']) prefixes.merge!(ec.mappings) # Update parsed prefixes end # 2.2) Create a new associative array by mapping the keys from the current associative array ... new_element = {} element.each do |k, v| k = ec.mappings[k.to_s] while ec.mappings.has_key?(k.to_s) new_element[k] = v end unless element == new_element add_debug(path) {"traverse: keys after map: #{new_element.keys.inspect}"} element = new_element end # Other shortcuts to allow use of this method for terminal associative arrays if element['@iri'].is_a?(String) # 2.3 Return the IRI found from the value object = expand_term(element['@iri'], ec.base, ec) add_triple(path, subject, property, object) if subject && property return elsif element['@literal'] # 2.4 literal_opts = {} literal_opts[:datatype] = expand_term(element['@datatype'], ec.vocab.to_s, ec) if element['@datatype'] literal_opts[:language] = element['@language'].to_sym if element['@language'] object = RDF::Literal.new(element['@literal'], literal_opts) add_triple(path, subject, property, object) if subject && property return elsif element['@list'] # 2.4a (Lists) parse_list("#{path}[#{'@list'}]", element['@list'], subject, property, ec) return elsif element['@subject'].is_a?(String) # 2.5 Subject # 2.5.1 Set active object (subject) active_subject = expand_term(element['@subject'], ec.base, ec) elsif element['@subject'] # 2.5.2 Recursively process hash or Array values traverse("#{path}[#{'@subject'}]", element['@subject'], subject, property, ec) else # 2.6) Generate a blank node identifier and set it as the active subject. active_subject = RDF::Node.new end add_triple(path, subject, property, active_subject) if subject && property subject = active_subject element.each do |key, value| # 2.7) If a key that is not @context, @subject, or @type, set the active property by # performing Property Processing on the key. property = case key when '@type' then '@type' when /^@/ then next else expand_term(key, ec.vocab, ec) end # 2.7.3 if ec.list.include?(property.to_s) && value.is_a?(Array) # 2.7.3.1 (Lists) If the active property is the target of a @list coercion, and the value is an array, # process the value as a list starting at Step 3a. parse_list("#{path}[#{key}]", value, subject, property, ec) else traverse("#{path}[#{key}]", value, subject, property, ec) end end when Array # 3) If a regular array is detected, process each value in the array by doing the following: element.each_with_index do |v, i| traverse("#{path}[#{i}]", v, subject, property, ec) end when String # Perform coersion of the value, or generate a literal add_debug(path) do "traverse(#{element}): coerce?(#{property.inspect}) == #{ec.coerce[property.to_s].inspect}, " + "ec=#{ec.coerce.inspect}" end object = if ec.coerce[property.to_s] == '@iri' expand_term(element, ec.base, ec) elsif ec.coerce[property.to_s] RDF::Literal.new(element, :datatype => ec.coerce[property.to_s]) else RDF::Literal.new(element) end property = RDF.type if property == '@type' add_triple(path, subject, property, object) if subject && property when Float object = RDF::Literal::Double.new(element) add_debug(path) {"traverse(#{element}): native: #{object.inspect}"} add_triple(path, subject, property, object) if subject && property when Fixnum object = RDF::Literal.new(element) add_debug(path) {"traverse(#{element}): native: #{object.inspect}"} add_triple(path, subject, property, object) if subject && property when TrueClass, FalseClass object = RDF::Literal::Boolean.new(element) add_debug(path) {"traverse(#{element}): native: #{object.inspect}"} add_triple(path, subject, property, object) if subject && property else raise RDF::ReaderError, "Traverse to unknown element: #{element.inspect} of type #{element.class}" end end ## # add a statement, object can be literal or URI or bnode # # @param [String] path # @param [URI, BNode] subject the subject of the statement # @param [URI] predicate the predicate of the statement # @param [URI, BNode, Literal] object the object of the statement # @return [Statement] Added statement # @raise [ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_. def add_triple(path, subject, predicate, object) statement = RDF::Statement.new(subject, predicate, object) add_debug(path) {"statement: #{statement.to_ntriples}"} @callback.call(statement) end ## # Add debug event to debug array, if specified # # @param [XML Node, any] node:: XML Node or string for showing context # @param [String] message # @yieldreturn [String] appended to message, to allow for lazy-evaulation of message def add_debug(node, message = "") return unless ::JSON::LD.debug? || @options[:debug] message = message + yield if block_given? puts "#{node}: #{message}" if JSON::LD::debug? @options[:debug] << "#{node}: #{message}" if @options[:debug].is_a?(Array) end ## # Parse a JSON context, into a new EvaluationContext # @param [Hash{String => String,Hash}, String] context # JSON representation of @context # @return [EvaluationContext] # @raise [RDF::ReaderError] # on a remote context load error, syntax error, or a reference to a term which is not defined. def parse_context(ec, context) # Load context document, if it is a string if context.is_a?(String) begin context = open(context.to_s) {|f| JSON.load(f)} rescue JSON::ParserError => e raise RDF::ReaderError, "Failed to parse remote context at #{context}: #{e.message}" end end context.each do |key, value| add_debug("parse_context(#{key})") {value.inspect} case key when '@vocab' then ec.vocab = value when '@base' then ec.base = uri(value) when '@coerce' # Process after prefix mapping else # Spec confusion: The text indicates to merge each key-value pair into the active context. Is any # processing performed on the values. For instance, could a value be a CURIE, or {"@iri": <value>}? # Examples indicate that there is no such processing, and each value should be an absolute IRI. The # wording makes this unclear. ec.mappings[key.to_s] = value end end if context['@coerce'] # Spec confusion: doc says to merge each key-value mapping to the local context's @coerce mapping, # overwriting duplicate values. In the case where a mapping is indicated to a list of properties # (e.g., { "@iri": ["foaf:homepage", "foaf:member"] }, does this overwrite a previous mapping # of { "@iri": "foaf:knows" }, or add to it. add_error RDF::ReaderError, "Expected @coerce to reference an associative array" unless context['@coerce'].is_a?(Hash) context['@coerce'].each do |type, property| add_debug("parse_context: @coerce") {"type=#{type}, prop=#{property}"} type_uri = expand_term(type, ec.vocab, ec).to_s [property].flatten.compact.each do |prop| p = expand_term(prop, ec.vocab, ec).to_s if type == '@list' # List is managed separate from types, as it is maintained in normal form. ec.list << p unless ec.list.include?(p) else ec.coerce[p] = type_uri end end end end ec end ## # Parse a List # # @param [String] path # location within JSON hash # @param [Array] list # The Array to serialize as a list # @param [RDF::URI] subject # Inherited subject # @param [RDF::URI] property # Inherited property # @param [EvaluationContext] ec # The active context def parse_list(path, list, subject, property, ec) add_debug(path) {"list: #{list.inspect}, s=#{subject.inspect}, p=#{property.inspect}, e=#{ec.inspect}"} last = list.pop first_bnode = last ? RDF::Node.new : RDF.nil add_triple("#{path}", subject, property, first_bnode) list.each do |list_item| traverse("#{path}", list_item, first_bnode, RDF.first, ec) rest_bnode = RDF::Node.new add_triple("#{path}", first_bnode, RDF.rest, rest_bnode) first_bnode = rest_bnode end if last traverse("#{path}", last, first_bnode, RDF.first, ec) add_triple("#{path}", first_bnode, RDF.rest, RDF.nil) end end ## # Expand a term using the specified context # # @param [String] term # @param [String] base Base to apply to URIs # @param [EvaluationContext] ec # # @return [RDF::URI] # @raise [RDF::ReaderError] if the term cannot be expanded # @see http://json-ld.org/spec/ED/20110507/#markup-of-rdf-concepts def expand_term(term, base, ec) #add_debug("expand_term", {"term=#{term.inspect}, base=#{base.inspect}, ec=#{ec.inspect}"} prefix, suffix = term.split(":", 2) if prefix == '_' bnode(suffix) elsif ec.mappings.has_key?(prefix) uri(ec.mappings[prefix] + suffix.to_s) elsif base base.respond_to?(:join) ? base.join(term) : uri(base + term) else uri(term) end end def uri(value, append = nil) value = RDF::URI.new(value) value = value.join(append) if append value.validate! if validate? value.canonicalize! if canonicalize? value = RDF::URI.intern(value) if intern? value end # Keep track of allocated BNodes # # Don't actually use the name provided, to prevent name alias issues. # @return [RDF::Node] def bnode(value = nil) @bnode_cache ||= {} @bnode_cache[value.to_s] ||= RDF::Node.new end end end