lib/rdf/vocab/extensions.rb in rdf-vocab-2.0.0 vs lib/rdf/vocab/extensions.rb in rdf-vocab-2.0.1

- old
+ new

@@ -1,7 +1,7 @@ -require 'rdf' # frozen_string_literal: true +require 'rdf' require 'rdf/vocabulary' require 'rdf/vocab' # Monkey-patch RDF::Vocab.each to load all vocabularies @@ -18,18 +18,491 @@ def each(&block) # This is needed since all vocabulary classes are defined using # Ruby's autoloading facility, meaning that `@@subclasses` will be # empty until each subclass has been touched or require'd. RDF::Vocab::VOCABS.each do |n, params| - class_name = params.fetch(:class_name, n.upcase).to_sym begin require "rdf/vocab/#{n}" rescue LoadError # Transient error warn "Failed to load #{n}" end end _orig_each(&block) end + + begin + require 'rdf/turtle' + ## + # Generate Turtle representation, specific to vocabularies + # + # @param [RDF::Queryable] :graph Optional graph, otherwise uses statements from vocabulary. + # @param [Hash{#to_sym => String}] Prefixes to add to output + # @return [String] + def to_ttl(graph: nil, prefixes: nil) + output = [] + + # Find namespaces used in the vocabulary + graph = RDF::Graph.new {|g| each_statement {|s| g << s}} if graph.nil? || graph.empty? + + prefixes = vocab_prefixes(graph).merge(prefixes || {}) + pfx_width = prefixes.keys.map(&:to_s).map(&:length).max + prefixes.each do |pfx, uri| + output << "@prefix %*s: <%s> .\n" % [pfx_width, pfx, uri] + end + + # Determine the category for each subject in the vocabulary graph + cats = subject_categories(graph) + + writer = RDF::Turtle::Writer.new(StringIO.new, prefixes: prefixes) + + { + ont: { + heading: "# #{__name__.split('::').last} Vocabulary definition\n" + }, + classes: { + heading: "# Class definitions\n" + }, + properties: { + heading: "# Property definitions\n" + }, + datatypes: { + heading: "# Datatype definitions\n" + }, + other: { + heading: "# Other definitions\n" + } + }.each do |key, hash| + next unless cats[key] + + output << "\n\n#{hash[:heading]}" + + cats[key].each do |subject| + po = {} + + # Group predicates with their values + graph.query(subject: subject) do |statement| + # Sanity check this, as these are set to an empty string if not defined. + next if [RDF::RDFS.label, RDF::RDFS.comment].include?(statement.predicate) && statement.object.to_s.empty? + po[statement.predicate] ||= [] + po[statement.predicate] << statement.object + end + + next if po.empty? + + po_list = [] + unless (types = po.delete(RDF.type)).empty? + po_list << 'a ' + types.map {|o| writer.format_term(o)}.join(", ") + end + + # Serialize other predicate/objects + po.each do |predicate, objects| + resource = predicate.qname ? predicate.pname : "<#{predicate}>" + po_list << resource + ' ' + objects.map {|o| writer.format_term(o)}.join(", ") + end + + # Output statements for this subject + subj = subject.qname ? subject.pname : "<#{subject}>" + output << "#{subj} " + po_list.join(";\n ") + "\n .\n" + end + end + + output.join("") + end + rescue LoadError + # No Turtle serialization unless gem loaded + end + + begin + require 'json/ld' + + ## + # Generate JSON-LD representation, specific to vocabularies + # + # @param [RDF::Queryable] :graph Optional graph, otherwise uses statements from vocabulary. + # @param [Hash{#to_sym => String}] Prefixes to add to output + # @return [String] + def to_jsonld(graph: nil, prefixes: nil) + context = {} + rdfs_context = ::JSON.parse %({ + "dc:title": {"@container": "@language"}, + "dc:description": {"@container": "@language"}, + "dc:date": {"@type": "xsd:date"}, + "rdfs:comment": {"@container": "@language"}, + "rdfs:domain": {"@type": "@vocab"}, + "rdfs:label": {"@container": "@language"}, + "rdfs:range": {"@type": "@vocab"}, + "rdfs:seeAlso": {"@type": "@id"}, + "rdfs:subClassOf": {"@type": "@vocab"}, + "rdfs:subPropertyOf": {"@type": "@vocab"}, + "schema:domainIncludes": {"@type": "@vocab"}, + "schema:rangeIncludes": {"@type": "@vocab"}, + "owl:equivalentClass": {"@type": "@vocab"}, + "owl:equivalentProperty": {"@type": "@vocab"}, + "owl:oneOf": {"@container": "@list", "@type": "@vocab"}, + "owl:imports": {"@type": "@id"}, + "owl:versionInfo": {"@type": "@id"}, + "owl:inverseOf": {"@type": "@vocab"}, + "owl:unionOf": {"@type": "@vocab", "@container": "@list"}, + "rdfs_classes": {"@reverse": "rdfs:isDefinedBy", "@type": "@id"}, + "rdfs_properties": {"@reverse": "rdfs:isDefinedBy", "@type": "@id"}, + "rdfs_datatypes": {"@reverse": "rdfs:isDefinedBy", "@type": "@id"}, + "rdfs_instances": {"@reverse": "rdfs:isDefinedBy", "@type": "@id"} + }) + rdfs_classes, rdfs_properties, rdfs_datatypes, rdfs_instances = [], [], [], [], [] + + ontology = { + "@context" => rdfs_context, + "@id" => to_uri.to_s + } + + # Find namespaces used in the vocabulary + graph = RDF::Graph.new {|g| each_statement {|s| g << s}} if graph.nil? || graph.empty? + + prefixes = vocab_prefixes(graph).merge(prefixes || {}) + prefixes.each do |pfx, uri| + context[pfx.to_s] = uri.to_s unless pfx.to_s.empty? + end + + # Determine the category for each subject in the vocabulary graph + cats = subject_categories(graph) + + # Generate term definitions from graph subjects + cats.values.flatten.each do |term| + next unless Array(term.qname).length == 2 + context[term.qname.last.to_s] = term.to_uri.to_s + end + + # Parse the two contexts so we know what terms are in scope + jld_context = ::JSON::LD::Context.new.parse([context, rdfs_context]) + + { + ont: { + heading: "# #{__name__.split('::').last} Vocabulary definition\n", + bucket: ontology, + }, + classes: { + heading: "# Class definitions\n", + bucket: rdfs_classes, + rev_prop: "rdfs_classes" + }, + properties: { + heading: "# Property definitions\n", + bucket: rdfs_properties, + rev_prop: "rdfs_properties" + }, + datatypes: { + heading: "# Datatype definitions\n", + bucket: rdfs_datatypes, + rev_prop: "rdfs_datatypes" + }, + other: { + heading: "# Other definitions\n", + bucket: rdfs_instances, + rev_prop: "rdfs_instances" + } + }.each do |key, hash| + next unless cats[key] + + cats[key].each do |subject| + node = {"@id" => subject.pname} + po = {} + + # Group predicates with their values + graph.query(subject: subject) do |statement| + # Sanity check this, as these are set to an empty string if not defined. + next if [RDF::RDFS.label, RDF::RDFS.comment].include?(statement.predicate) && statement.object.to_s.empty? + po[statement.predicate] ||= [] + po[statement.predicate] << statement.object + end + + next if po.empty? + + node['@type'] = po.delete(RDF.type).map {|t| jld_context.compact_iri(t, vocab: true)} + + po.each do |predicate, objects| + term = jld_context.compact_iri(predicate, vocab: true) + node[term] = if jld_context.container(term) == '@language' + lang_map = objects.inject({}) do |memo, o| + raise "Language-mapped term #{term} with non plain-literal #{o.inspect}" unless o.literal? && o.plain? + memo.merge(o.language.to_s => o.value) + end + # Don't use language map if there's only one entry with no language + lang_map = lang_map[""] if lang_map.keys == [""] + [lang_map] + else + objects.map do |o| + expanded_value = jld_context.expand_value(term, o) + jld_context.compact_value(term, expanded_value) + end + end + end + + node.each do |property, values| + case values.length + when 0 then node.delete(property) + when 1 then node[property] = values.first + end + end + + # Either set bucket from node, or append node to bucket + if hash[:bucket].is_a?(Hash) + hash[:bucket].merge!(node) + else + ontology[hash[:rev_prop]] ||= hash[:bucket] + hash[:bucket] << node + end + end + end + + # Serialize result + { + "@context" => context, + "@graph" => ontology + }.to_json(::JSON::LD::JSON_STATE) + end + rescue LoadError + # No JSON-LD serialization unless gem loaded + end + + ## + # Generate HTML+RDFa representation, specific to vocabularies. This uses generated JSON-LD and a Haml template. + # + # @param [RDF::Queryable] :graph Optional graph, otherwise uses statements from vocabulary. + # @param [Hash{#to_sym => String}] Prefixes to add to output + # @param [String, Hash] jsonld + # If not provided, the `to_jsonld` method is used to generate it. + # @param [String] template The path to a Haml or ERB template used to generate the output using the JSON-LD serialization + # @return [String] + def to_html(graph: nil, prefixes: nil, jsonld: nil, template: nil) + # Find namespaces used in the vocabulary + graph = RDF::Graph.new {|g| each_statement {|s| g << s}} if graph.nil? || graph.empty? + + # Get JSON as an object + json = case jsonld + when String then ::JSON.parse(File.read jsonld) + when Hash then jsonld + else + ::JSON.parse(to_jsonld(graph: graph, prefixes: prefixes)) + end + raise "Expected JSON-LD data within the '@graph' key" unless json.has_key?('@graph') + + template ||= File.expand_path("../../../../etc/template.erb", __FILE__) + + prefixes = vocab_prefixes(graph).merge(prefixes || {}) + prefixes[:owl] = RDF::OWL.to_uri.to_s + + # Make sure ontology is typed + json['@graph']['@type'] ||= ['owl:Ontology'] + + jld_context = ::JSON::LD::Context.new.parse([json['@context'], json['@graph']['@context']]) + + # Expand the JSON-LD to normalize accesses + expanded = ::JSON::LD::API.expand(json).first + expanded.delete('@reverse') + + # Re-compact keys + expanded = expanded.inject({}) do |memo, (k, v)| + term = RDF::Vocabulary.find_term(k) + k = term.pname if term + memo.merge(k => v) + end + + # Normalize label accessors + expanded['rdfs:label'] ||= %w(dc:title dc11:title skos:prefLabel).inject(nil) do |memo, key| + memo || expanded[key] + end || [{'@value' => json['@graph']['@id']}] + %w(rdfs_classes rdfs_properties rdfs_datatypes rdfs_instances).each do |section| + next unless json['@graph'][section] + json['@graph'][section].each do |node| + node['rdfs:label'] ||= %w(dc:title dc11:title skos:prefLabel).inject do |memo, key| + memo || node[key] + end || [{'@value' => node['@id']}] + end + end + + # Expand each part separately, as well. + %w(rdfs_classes rdfs_properties rdfs_datatypes rdfs_instances).each do |section| + next unless json['@graph'][section] + expanded_section = ::JSON::LD::API.expand(json['@graph'][section], expandContext: jld_context) + # Re-compact keys + expanded[section] = expanded_section.map do |node| + node.inject({}) do |memo, (k, v)| + term = RDF::Vocabulary.find_term(k) + k = term.pname if term + memo.merge(k => v) + end + end + end + + # Template invoked with expanded JSON-LD with outer object including `rdfs_classes`, `rdfs_properties`, and `rdf_instances` sections. + case template + when /.haml$/ + require 'haml' + haml = Haml::Engine.new(File.read(template)) + haml.render(self, ont: expanded, context: json['@context'], prefixes: prefixes) + when /.erb$/ + require 'erubis' + eruby = Erubis::FastEruby.new(File.read(template)) + result = eruby.evaluate(binding: self, ont: expanded, context: json['@context'], prefixes: prefixes) + else + raise "Unknown template type #{template}. Should have '.erb' or '.haml' extension" + end + end + + ## + # Create HTML for values (Helper method, needs to be public) + def value_to_html(property, value, tag) + value.map do |v| + %(<#{tag} property="#{property}") + + if v['@value'] + (v['@language'] ? %( language="#{v['@language']}") : "") + + (v['@type'] ? %( datatype="#{RDF::URI(v['@type']).pname}") : "") + + %(>#{v['@value']}) + elsif v['@id'] + %( resource="#{RDF::URI(v['@id']).pname}">#{RDF::URI(v['@id']).pname}) + else + raise "Unknown value type: #{v.inspect}, #{property}" + end + + %(</#{tag}>) + end.join("\n") + end + private + + ## + # Prefixes used in this vocabulary + # + # @param [RDF::Graph] graph + # @return [Hash{Symbol => RDF::URI}] + def vocab_prefixes(graph) + vocabs = graph. + terms. + select(&:uri?). + map {|u| RDF::Vocabulary.find(u)}. + uniq. + compact. + sort_by(&:__prefix__) + vocabs << RDF::XSD # incase we need it for a literal + + # Generate prefix definitions + vocabs.inject({}) do |memo, v| + memo.merge(v.__prefix__ => v.to_uri) + end + end + + ## + # Categorize each subject in the graph + # + # @param [RDF::Graph] graph + # @return [Hash{RDF::URI => Symbol}] + def subject_categories(graph) + cats = {} + categorized = {} + uncategorized = {} + graph.query(predicate: RDF.type) do |statement| + # Only serialize statements that are in the defined vocabulary + next unless statement.subject.start_with?(self.to_uri) + case statement.object + when RDF.Property, + RDF::OWL.AnnotationProperty, + RDF::OWL.DatatypeProperty, + RDF::OWL.FunctionalProperty, + RDF::OWL.ObjectProperty, + RDF::OWL.OntologyProperty + (cats[:properties] ||= []) << statement.subject unless categorized[statement.subject] + categorized[statement.subject] = true + when RDF::RDFS.Class, RDF::OWL.Class + (cats[:classes] ||= []) << statement.subject unless categorized[statement.subject] + categorized[statement.subject] = true + when RDF::RDFS.Datatype, RDF::OWL.DataRange + (cats[:datatypes] ||= []) << statement.subject unless categorized[statement.subject] + categorized[statement.subject] = true + when RDF::OWL.Ontology + (cats[:ont] ||= []) << statement.subject unless categorized[statement.subject] + categorized[statement.subject] = true + else + if statement.subject == self.to_uri + (cats[:ont] ||= []) << statement.subject unless categorized[statement.subject] + categorized[statement.subject] = true + else + uncategorized[statement.subject] = true + end + end + end + + # Add all uncategorized subjects as :other + uncat = (uncategorized.keys - categorized.keys) + cats[:other] = uncat unless uncat.empty? + + cats + end end + + module VocabFormatExtensions + ## + # Hash of CLI commands appropriate for this format + # @return [Hash{Symbol => Lambda(Array, Hash)}] + def cli_commands + super.merge({ + :"gen-vocab" => { + description: "Generate a vocabulary using a special serialization. Accepts an input graph, or serializes built-in vocabulary", + parse: false, # Only parse if there are input files, otherwise, uses vocabulary + help: "gen-vocab --uri <vocabulary-URI> [--output format ttl|jsonld|html] [options] [files]\n", + lambda: ->(files, options) do + $stdout.puts "Generate Vocabulary" + raise ArgumentError, "Must specify vocabulary URI" unless options[:base_uri] + + # Parse input graphs, if repository is not already created + if RDF::CLI.repository.empty? && !files.empty? + RDF::CLI.parse(files, options) do |reader| + RDF::CLI.repository << reader + end + end + + # Lookup vocabulary, or generate a new vocabulary from this URI + vocab = RDF::Vocabulary.find(options[:base_uri]) || begin + raise ArgumentError, "Must specify vocabulary prefix if vocabulary not built-in" unless options[:prefix] + RDF::Vocabulary.from_graph(RDF::CLI.repository, url: options[:base_uri], class_name: options[:prefix].to_s.upcase) + end + + prefixes = {} + prefixes[options[:prefix]] = options[:base_uri] if options[:prefix] + out = options[:output] || $stdout + case options[:output_format] + when :ttl, nil then out.write vocab.to_ttl(graph: RDF::CLI.repository, prefixes: prefixes) + when :jsonld then out.write vocab.to_jsonld(graph: RDF::CLI.repository, prefixes: prefixes) + when :html then out.write vocab.to_html(graph: RDF::CLI.repository, prefixes: prefixes, template: options[:template]) + else + # Use whatever writer we find + writer = RDF::Writer.for(options[:output_format]) || RDF::NTriples::Writer + writer.new(out, options) do |w| + if RDF::CLI.repository.empty? + vocab.each_statement {|s| w << s} + else + w << RDF::CLI.repository + end + end + end + end, + options: [ + RDF::CLI::Option.new( + symbol: :prefix, + datatype: String, + on: ["--prefix PREFIX"], + description: "Prefix associated with vocabulary, if not built-in."), + RDF::CLI::Option.new( + symbol: :template, + datatype: String, + on: ["--template TEMPLATE"], + description: "Path to local template for generating HTML, either Haml or ERB, depending on file extension.\n" + + "See https://github.com/ruby-rdf/rdf-vocab/tree/develop/etc for built-in templates."), + ] + } + }) + end + end + + # Add cli_commands as class method to RDF::Vocabulary::Format + # TODO: in Ruby 2.0, `prepend` seems to be a private method of the class singleton; works okay elsewhere. + Format.singleton_class.send(:prepend, VocabFormatExtensions) end end \ No newline at end of file