lib/sxp/reader/sparql.rb in sxp-0.0.13 vs lib/sxp/reader/sparql.rb in sxp-0.0.14
- old
+ new
@@ -6,77 +6,233 @@
#
# Requires [RDF.rb](http://rdf.rubyforge.org/).
#
# @see http://openjena.org/wiki/SSE
class SPARQL < Extended
+ # Alias for rdf:type
+ A = /^a$/
+ # Base token, causes next URI to be treated as the `base_uri` for further URI expansion
+ BASE = /^base$/i
+ # Prefix token, causes following prefix and URI pairs to be used for transforming
+ # {PNAME} tokens into URIs.
+ PREFIX = /^prefix$/i
NIL = /^nil$/i
FALSE = /^false$/i
TRUE = /^true$/i
EXPONENT = /[eE][+-]?[0-9]+/
- DECIMAL = /^[+-]?(\d*)?\.\d*#{EXPONENT}?$/
+ DECIMAL = /^[+-]?(\d*)?\.\d*$/
+ DOUBLE = /^[+-]?(\d*)?\.\d*#{EXPONENT}$/
+ # BNode with identifier
BNODE_ID = /^_:([A-Za-z][A-Za-z0-9]*)/ # FIXME
+ # Anonymous BNode
BNODE_NEW = /^_:$/
- VAR_ID = /^\?([A-Za-z][A-Za-z0-9]*)/ # FIXME
- VAR_GEN = /^\?\?([0-9]+)/
- VAR_NEW = '??'
+ # Distinguished variable with an optional name
+ VAR_ID = /^\?([A-Za-z][A-Za-z0-9]*)?/ # FIXME
+ # Non-distinguished variable with an optional identifier
+ ND_VAR = /^\?\?([0-9]*)/
+ # A URI reference, subject to expansion using `base_uri`
URIREF = /^<([^>]+)>/
+ # A QName, subject to expansion to URIs using {PREFIX}
+ PNAME = /([^:]*):([^:]*)/
+
+ RDF_TYPE = (a = RDF.type.dup; a.lexical = 'a'; a).freeze
##
+ # Base URI as specified or when parsing parsing a BASE token using the immediately following
+ # token, which must be a URI.
+ attr_accessor :base_uri
+
+ ##
+ # Prefixes defined while parsing
+ # @return [Hash{Object => RDF::URI}]
+ attr_accessor :prefixes
+
+ ##
+ # Defines the given named URI prefix for this parser.
+ #
+ # @example Defining a URI prefix
+ # parser.prefix :dc, RDF::URI('http://purl.org/dc/terms/')
+ #
+ # @example Returning a URI prefix
+ # parser.prefix(:dc) #=> RDF::URI('http://purl.org/dc/terms/')
+ #
+ # @overload prefix(name, uri)
+ # @param [Symbol, #to_s] name
+ # @param [RDF::URI, #to_s] uri
+ #
+ # @overload prefix(name)
+ # @param [Symbol, #to_s] name
+ #
+ # @return [RDF::URI]
+ def prefix(name, uri = nil)
+ name = name.to_s.empty? ? nil : (name.respond_to?(:to_sym) ? name.to_sym : name.to_s.to_sym)
+ uri.nil? ? @prefixes[name] : @prefixes[name] = uri
+ end
+
+ ##
+ # Initializes the reader.
+ #
+ # @param [IO, StringIO, String] input
+ # @param [Hash{Symbol => Object}] options
+ def initialize(input, options = {}, &block)
+ super { @prefixes = {}; @bnodes = {}; @list_depth = 0 }
+
+ if block_given?
+ case block.arity
+ when 1 then block.call(self)
+ else self.instance_eval(&block)
+ end
+ end
+ end
+
+ ##
+ # Reads SSE Tokens, including {RDF::Literal}, {RDF::URI} and RDF::Node.
+ #
+ # Performs forward reference for prefix and base URI representations and saves in
+ # {#base_uri} and {#prefixes} accessors.
+ #
+ # Transforms tokens matching a {PNAME} pattern into {RDF::URI} instances if a match is
+ # found with a previously identified {PREFIX}.
# @return [Object]
def read_token
case peek_char
- when ?" then [:atom, read_rdf_literal] # "
- when ?< then [:atom, read_rdf_uri]
- else super
+ when ?" then [:atom, read_rdf_literal] # "
+ when ?< then [:atom, read_rdf_uri]
+ else
+ tok = super
+
+ # If we just parsed "PREFIX", and this is an opening list, then
+ # record list depth and process following as token, URI pairs
+ #
+ # Once we've closed the list, go out of prefix mode
+ if tok.is_a?(Array) && tok[0] == :list
+ if '(['.include?(tok[1])
+ @list_depth += 1
+ else
+ @list_depth -= 1
+ @prefix_depth = nil if @prefix_depth && @list_depth < @prefix_depth
+ end
+ end
+
+ if tok.is_a?(Array) && tok[0] == :atom && tok[1].is_a?(Symbol)
+ value = tok[1].to_s
+
+ # We previously parsed a PREFIX, this will be the map value
+ @parsed_prefix = value.chop if @prefix_depth && @prefix_depth > 0
+
+ # If we just saw PREFIX, then this starts the parsing mode
+ @prefix_depth = @list_depth + 1 if value =~ PREFIX
+
+ # If the token is of the form 'prefix:suffix', create a URI and give it the
+ # token as a QName
+ if value.to_s =~ PNAME && base = prefix($1)
+ suffix = $2
+ #STDERR.puts "read_tok lexical: pfx: #{$1.inspect} => #{prefix($1).inspect}, sfx: #{suffix.inspect}"
+ suffix = suffix.sub(/^\#/, "") if base.to_s.index("#")
+ uri = RDF::URI(base.to_s + suffix)
+ #STDERR.puts "read_tok lexical uri: #{uri.inspect}"
+
+ # Cause URI to be serialized as a lexical
+ uri.lexical = value
+ [:atom, uri]
+ else
+ tok
+ end
+ else
+ tok
+ end
end
end
##
+ # Reads literals corresponding to SPARQL/Turtle/Notation-3 syntax
+ #
+ # @example
+ # "a plain literal"
+ # "a literal with a language"@en
+ # "a typed literal"^^<http://example/>
+ # "a typed literal with a PNAME"^^xsd:string
+ #
# @return [RDF::Literal]
def read_rdf_literal
value = read_string
options = case peek_char
when ?@
skip_char # '@'
- {:language => read_atom}
+ {:language => read_atom.downcase}
when ?^
2.times { skip_char } # '^^'
- {:datatype => read_rdf_uri} # TODO: support prefixed names
+ {:datatype => read_token.last}
else {}
end
RDF::Literal(value, options)
end
##
+ # Reads a URI in SPARQL/Turtle/Notation-3 syntax
+ #
+ # @example
+ # <http://example/>
+ #
# @return [RDF::URI]
def read_rdf_uri
buffer = String.new
skip_char # '<'
return :< if (char = peek_char).nil? || char.chr !~ ATOM # FIXME: nasty special case for the '< symbol
return :<= if peek_char.chr.eql?(?=.chr) && read_char # FIXME: nasty special case for the '<= symbol
until peek_char == ?>
buffer << read_char # TODO: unescaping
end
skip_char # '>'
- RDF::URI(buffer)
+
+ # If we have a base URI, use that when constructing a new URI
+ uri = if self.base_uri
+ u = self.base_uri.join(buffer)
+ u.lexical = "<#{buffer}>" unless u.to_s == buffer # So that it can be re-serialized properly
+ u
+ else
+ RDF::URI(buffer)
+ end
+
+ # If we previously parsed a "BASE" element, then this URI is used to set that value
+ if @parsed_base
+ self.base_uri = uri
+ @parsed_base = nil
+ end
+
+ # If we previously parsed a "PREFIX" element, associate this URI with the prefix
+ if @parsed_prefix
+ prefix(@parsed_prefix, uri)
+ @parsed_prefix = nil
+ end
+
+ uri
end
##
+ # Reads an SSE Atom
+ #
+ # Atoms parsed including `base`, `prefix`, `true`, `false`, numeric, BNodes and variables.
+ #
+ # Creates {RDF::Literal}, RDF::Node, or {RDF::Query::Variable} instances where appropriate.
+ #
# @return [Object]
def read_atom
case buffer = read_literal
when '.' then buffer.to_sym
+ when A then RDF_TYPE
+ when BASE then @parsed_base = true; buffer.to_sym
when NIL then nil
when FALSE then RDF::Literal(false)
when TRUE then RDF::Literal(true)
- when DECIMAL then RDF::Literal(Float(buffer[-1].eql?(?.) ? buffer + '0' : buffer))
- when INTEGER then RDF::Literal(Integer(buffer))
- when BNODE_ID then RDF::Node($1)
+ when DOUBLE then RDF::Literal::Double.new(buffer)
+ when DECIMAL then RDF::Literal::Decimal.new(buffer)
+ when INTEGER then RDF::Literal::Integer.new(buffer)
+ when BNODE_ID then @bnodes[$1] ||= RDF::Node($1)
when BNODE_NEW then RDF::Node.new
- when VAR_ID then RDF::Query::Variable.new($1)
- when VAR_GEN then RDF::Query::Variable.new("?#{$1}") # FIXME?
- when VAR_NEW then RDF::Query::Variable.new
+ when ND_VAR then variable($1, false)
+ when VAR_ID then variable($1, true)
else buffer.to_sym
end
end
##
@@ -87,9 +243,35 @@
when /\s+/ then skip_char
when /;/ then skip_line
when /#/ then skip_line
else break
end
+ end
+ end
+
+ ##
+ # Return variable allocated to an ID.
+ # If no ID is provided, a new variable
+ # is allocated. Otherwise, any previous assignment will be used.
+ #
+ # The variable has a #distinguished? method applied depending on if this
+ # is a disinguished or non-distinguished variable. Non-distinguished
+ # variables are effectively the same as BNodes.
+ # @return [RDF::Query::Variable]
+ def variable(id, distinguished = true)
+ id = nil if id.to_s.empty?
+
+ if id
+ @vars ||= {}
+ @vars[id] ||= begin
+ v = RDF::Query::Variable.new(id)
+ v.distinguished = distinguished
+ v
+ end
+ else
+ v = RDF::Query::Variable.new
+ v.distinguished = distinguished
+ v
end
end
end # SPARQL
end; end # SXP::Reader