#monkey patch to make rdf string w/ heredocs prettier ;) class String def unindent gsub /^#{self[/\A\s*/]}/, '' end end module R2RDF class Dataset module DataCube include R2RDF::Parser def defaults { type: :dataframe, encode_nulls: false, base_url: "http://www.rqtl.org", } end def generate_resources(measures, dimensions, codes, options={}) newm = measures.map {|m| if m =~ /^http:\/\// "<#{m}>" elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/ m else "prop:#{m}" end } newc = [] newd = dimensions.map{|d| if d =~ /^http:\/\// # newc << "<#{d}>" if codes.include? d "<#{d}>" elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/ d else # newc << "prop:#{d}" if codes.include? d "prop:#{d}" end } if codes.first.is_a? Array newc = codes.map{|c| c.map{|el| if el =~ /^http:\/\// "<#{el}>" else el end } } else newc = codes.map{|c| ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"] } end [newm, newd, newc] end def encode_data(codes,data,var,options={}) codes = sanitize(codes) new_data = {} data.map{|k,v| if codes.include? k new_data[k] = v.map{|val| if val =~ /^http:\/\// "<#{val}>" elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/ val else "<code/#{k.downcase}/#{val}>" end } else new_data[k] = v end } new_data end def vocabulary(vocab,options={}) if vocab.is_a?(String) && vocab =~ /^http:\/\// RDF::Vocabulary.new(vocab) elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/ RDF.const_get(vocab) else nil end end def generate(measures, dimensions, codes, data, observation_labels, var, options={}) # dimensions = sanitize(dimensions) # codes = sanitize(codes) # measures = sanitize(measures) var = sanitize([var]).first data = sanitize_hash(data) str = prefixes(var,options) str << data_structure_definition(measures, dimensions, codes, var, options) str << dataset(var, options) # component_specifications(measures, dimensions, var, options).map{ |c| str << c } dimension_properties(dimensions, codes, var, options).map{|p| str << p} measure_properties(measures, var, options).map{|p| str << p} code_lists(codes, data, var, options).map{|l| str << l} concept_codes(codes, data, var, options).map{|c| str << c} observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o} str end def prefixes(var, options={}) var = sanitize([var]).first options = defaults().merge(options) base = options[:base_url] <<-EOF.unindent @base <#{base}/ns/dc/> . @prefix ns: <#{base}/ns/dataset/#{var}#> . @prefix qb: <http://purl.org/linked-data/cube#> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix prop: <#{base}/dc/properties/> . @prefix dct: <http://purl.org/dc/terms/> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix cs: <#{base}/dc/dataset/#{var}/cs/> . @prefix code: <#{base}/dc/dataset/#{var}/code/> . @prefix owl: <http://www.w3.org/2002/07/owl#> . @prefix skos: <http://www.w3.org/2004/02/skos/core#> . @prefix foaf: <http://xmlns.com/foaf/0.1/> . @prefix org: <http://www.w3.org/ns/org#> . @prefix prov: <http://www.w3.org/ns/prov#> . EOF end def data_structure_definition(measures,dimensions,codes,var,options={}) var = sanitize([var]).first options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n" rdf_dimensions.map{|d| str << " qb:component [ qb:dimension #{d} ] ;\n" } rdf_measures.map{|m| str << " qb:component [ qb:measure #{m} ] ;\n" } str[-2]='.' str<<"\n" str end def dataset(var,options={}) var = sanitize([var]).first options = defaults().merge(options) <<-EOF.unindent ns:dataset-#{var} a qb:DataSet ; rdfs:label "#{var}"@en ; qb:structure ns:dsd-#{var} . EOF end def component_specifications(measure_names, dimension_names, var, options={}) options = defaults().merge(options) specs = [] dimension_names.map{|d| specs << <<-EOF.unindent cs:#{d} a qb:ComponentSpecification ; rdfs:label "#{d} Component" ; qb:dimension prop:#{d} . EOF } measure_names.map{|n| specs << <<-EOF.unindent cs:#{n} a qb:ComponentSpecification ; rdfs:label "#{n} Component" ; qb:measure prop:#{n} . EOF } specs end def dimension_properties(dimensions, codes, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options) props = [] dimension_codes = rdf_codes.map{|c| if c[0]=~/^<http:/ c[0][1..-2] else c[0] end } rdf_dimensions.each_with_index{|d,i| if dimension_codes.include?(dimensions[i]) code = rdf_codes[dimension_codes.index(dimensions[i])] props << <<-EOF.unindent #{d} a rdf:Property, qb:DimensionProperty ; rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ; qb:codeList #{code[1]} ; rdfs:range #{code[2]} . EOF else props << <<-EOF.unindent #{d} a rdf:Property, qb:DimensionProperty ; rdfs:label "#{strip_prefixes(strip_uri(d))}"@en . EOF end } props end def measure_properties(measures, var, options={}) options = defaults().merge(options) rdf_measures = generate_resources(measures, [], [], options)[0] props = [] rdf_measures.map{ |m| props << <<-EOF.unindent #{m} a rdf:Property, qb:MeasureProperty ; rdfs:label "#{strip_prefixes(strip_uri(m))}"@en . EOF } props end def observations(measures, dimensions, codes, data, observation_labels, var, options={}) var = sanitize([var]).first measures = sanitize(measures) dimensions = sanitize(dimensions) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) data = encode_data(codes, data, var, options) obs = [] dimension_codes = rdf_codes.map{|c| if c[0]=~/^<http:/ c[0][1..-2] else c[0] end } observation_labels.each_with_index.map{|r, i| contains_nulls = false str = <<-EOF.unindent ns:obs#{r} a qb:Observation ; qb:dataSet ns:dataset-#{var} ; EOF str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels] dimensions.each_with_index{|d,j| contains_nulls = contains_nulls | (data[d][i] == nil) if dimension_codes.include? d # str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n" str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n" else str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n" end } measures.each_with_index{|m,j| contains_nulls = contains_nulls | (data[m][i] == nil) str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n" } str << " .\n\n" if contains_nulls && !options[:encode_nulls] if options[:raise_nils] raise "missing component for observation, skipping: #{str}, " elsif options[:whiny_nils] puts "missing component for observation, skipping: #{str}, " end else obs << str end } obs end def code_lists(codes, data, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options) data = encode_data(codes, data, var, options) lists = [] rdf_codes.map{|code| if code[0] =~ /^<.+>$/ refcode = code[0][1..-2] else refcode = code[0] end str = <<-EOF.unindent #{code[2]} a rdfs:Class, owl:Class; rdfs:subClassOf skos:Concept ; rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en; rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation"; rdfs:seeAlso #{code[1]} . #{code[1]} a skos:ConceptScheme; skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en; rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en; skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}"; skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation"; EOF data[refcode].uniq.map{|value| unless value == nil && !options[:encode_nulls] str << " skos:hasTopConcept #{to_resource(value,options)} ;\n" end } str << " .\n\n" lists << str } lists end def concept_codes(codes, data, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options) concepts = [] data = encode_data(codes, data, var, options) rdf_codes.map{|code| if code[0] =~ /^<.+>$/ refcode = code[0][1..-2] else refcode = code[0] end data[refcode].uniq.each_with_index{|value,i| unless value == nil && !options[:encode_nulls] concepts << <<-EOF.unindent #{to_resource(value,options)} a skos:Concept, #{code[2]}; skos:topConceptOf #{code[1]} ; skos:prefLabel "#{strip_uri(data[refcode][i])}" ; skos:inScheme #{code[1]} . EOF end } } concepts end def abbreviate_known(turtle_string) #debug method # puts turtle_string turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2') end end end end