#monkey patch to make rdf string w/ heredocs prettier ;) class String def unindent gsub /^#{self[/\A\s*/]}/, '' end end module PubliSci class Dataset module DataCube include PubliSci::Parser def defaults { type: :dataframe, encode_nulls: false, base_url: "http://www.rqtl.org", } end def generate_resources(measures, dimensions, codes, options={}) newm = measures.map {|m| if m =~ /^http:\/\// "<#{m}>" elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/ m else "prop:#{m}" end } newc = [] newd = dimensions.map{|d| if d =~ /^http:\/\// # newc << "<#{d}>" if codes.include? d "<#{d}>" elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/ d else # newc << "prop:#{d}" if codes.include? d "prop:#{d}" end } if codes.first.is_a? Array newc = codes.map{|c| c.map{|el| if el =~ /^http:\/\// "<#{el}>" else el end } } else newc = codes.map{|c| ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"] } end [newm, newd, newc] end def component_gen(args,options={}) args = Array[args].flatten args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')} end def encode_data(codes,data,var,options={}) codes = sanitize(codes) new_data = {} data.map{|k,v| if codes.include? k new_data[k] = v.map{|val| if val =~ /^http:\/\// "<#{val}>" elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/ val else "" end } else new_data[k] = v end } new_data end def vocabulary(vocab,options={}) if vocab.is_a?(String) && vocab =~ /^http:\/\// RDF::Vocabulary.new(vocab) elsif RDF.const_defined? vocab.to_sym && RDF.const_get(vocab.to_sym).inspect =~ /^RDF::Vocabulary/ RDF.const_get(vocab) else nil end end def generate(measures, dimensions, codes, data, observation_labels, var, options={}) # dimensions = sanitize(dimensions) # codes = sanitize(codes) # measures = sanitize(measures) var = sanitize([var]).first data = sanitize_hash(data) str = prefixes(var,options) str << data_structure_definition(measures, dimensions, codes, var, options) str << dataset(var, options) component_specifications(measures, dimensions, codes, var, options).map{ |c| str << c } dimension_properties(dimensions, codes, var, options).map{|p| str << p} measure_properties(measures, var, options).map{|p| str << p} code_lists(codes, data, var, options).map{|l| str << l} concept_codes(codes, data, var, options).map{|c| str << c} observations(measures, dimensions, codes, data, observation_labels, var, options).map{|o| str << o} str end def prefixes(var, options={}) var = sanitize([var]).first options = defaults().merge(options) base = options[:base_url] <<-EOF.unindent @base <#{base}/ns/dc/> . @prefix ns: <#{base}/ns/dataset/#{var}/> . @prefix qb: . @prefix rdf: . @prefix rdfs: . @prefix prop: <#{base}/dc/properties/> . @prefix dct: . @prefix xsd: . @prefix cs: <#{base}/dc/dataset/#{var}/cs/> . @prefix code: <#{base}/dc/dataset/#{var}/code/> . @prefix owl: . @prefix skos: . @prefix foaf: . @prefix org: . @prefix prov: . EOF end def data_structure_definition(measures,dimensions,codes,var,options={}) var = sanitize([var]).first options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')} cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')} str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n" cs_dims.map{|d| str << " qb:component #{d} ;\n" } cs_meas.map{|m| str << " qb:component #{m} ;\n" } str[-2]='.' str<<"\n" str end def dataset(var,options={}) var = sanitize([var]).first options = defaults().merge(options) <<-EOF.unindent ns:dataset-#{var} a qb:DataSet ; rdfs:label "#{var}"@en ; qb:structure ns:dsd-#{var} . EOF end def component_specifications(measure_names, dimension_names, codes, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options) cs_dims = component_gen(rdf_dimensions,options) cs_meas = component_gen(rdf_measures,options) # cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')} # cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')} specs = [] rdf_dimensions.each_with_index.map{|d,i| specs << <<-EOF.unindent #{cs_dims[i]} a qb:ComponentSpecification ; rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ; qb:dimension #{d} . EOF } rdf_measures.each_with_index.map{|n,i| specs << <<-EOF.unindent #{cs_meas[i]} a qb:ComponentSpecification ; rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ; qb:measure #{n} . EOF } specs end def dimension_properties(dimensions, codes, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options) props = [] dimension_codes = rdf_codes.map{|c| if c[0]=~/^ ;\n" str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n" else str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n" end } measures.each_with_index{|m,j| contains_nulls = contains_nulls | (data[m][i] == nil) str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n" } str << " .\n\n" if contains_nulls && !options[:encode_nulls] if options[:raise_nils] raise "missing component for observation, skipping: #{str}, " elsif options[:whiny_nils] puts "missing component for observation, skipping: #{str}, " end else obs << str end } obs end def code_lists(codes, data, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options) data = encode_data(codes, data, var, options) lists = [] rdf_codes.map{|code| if code[0] =~ /^<.+>$/ refcode = code[0][1..-2] else refcode = code[0] end str = <<-EOF.unindent #{code[2]} a rdfs:Class, owl:Class; rdfs:subClassOf skos:Concept ; rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist class"@en; rdfs:comment "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation"; rdfs:seeAlso #{code[1]} . #{code[1]} a skos:ConceptScheme; skos:prefLabel "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en; rdfs:label "Code list for #{strip_prefixes(strip_uri(code[1]))} - codelist scheme"@en; skos:notation "CL_#{strip_prefixes(strip_uri(code[1])).upcase}"; skos:note "Specifies the #{strip_prefixes(strip_uri(code[1]))} for each observation"; EOF data[refcode].uniq.map{|value| unless value == nil && !options[:encode_nulls] str << " skos:hasTopConcept #{to_resource(value,options)} ;\n" end } str << " .\n\n" lists << str } lists end def concept_codes(codes, data, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], [], codes, options) concepts = [] data = encode_data(codes, data, var, options) rdf_codes.map{|code| if code[0] =~ /^<.+>$/ refcode = code[0][1..-2] else refcode = code[0] end # puts data[refcode].uniq data[refcode].uniq.each_with_index{|value,i| unless value == nil && !options[:encode_nulls] concepts << <<-EOF.unindent #{to_resource(value,options)} a skos:Concept, #{code[2]}; skos:topConceptOf #{code[1]} ; skos:prefLabel "#{strip_uri(value)}" ; skos:inScheme #{code[1]} . EOF end } } concepts end def abbreviate_known(turtle_string) #debug method # puts turtle_string turtle_string.gsub(//, 'prop:\1').gsub(//, '').gsub(//, 'code:\2') end end end end