lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.8 vs lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.1.0

- old
+ new

@@ -11,11 +11,11 @@ include PubliSci::Parser def defaults { type: :dataframe, encode_nulls: false, - base_url: "http://www.rqtl.org", + base_url: "http://onto.strinz.me", } end def generate_resources(measures, dimensions, codes, options={}) newm = measures.map {|m| @@ -30,16 +30,14 @@ newc = [] newd = dimensions.map{|d| if d =~ /^http:\/\// - # newc << "<#{d}>" if codes.include? d "<#{d}>" elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/ d else - # newc << "prop:#{d}" if codes.include? d "prop:#{d}" end } if codes.first.is_a? Array @@ -58,13 +56,14 @@ } end [newm, newd, newc] end - def component_gen(args,options={}) + def component_gen(args,var,options={}) args = Array[args].flatten - args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')} + args = args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')} + args.map{|arg| arg.gsub(%r{<http://(.+)>},"<#{options[:base_url]}/dc/dataset/#{var}/cs/"+'\1'+'>')} end def encode_data(codes,data,var,options={}) codes = sanitize(codes) new_data = {} @@ -74,11 +73,11 @@ if val =~ /^http:\/\// "<#{val}>" elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/ val else - "<code/#{k.downcase}/#{val}>" + "<code/#{k.downcase}/#{sanitize(val).first}>" end } else new_data[k] = v end @@ -118,16 +117,16 @@ def prefixes(var, options={}) var = sanitize([var]).first options = defaults().merge(options) base = options[:base_url] <<-EOF.unindent - @base <#{base}/ns/dc/> . - @prefix ns: <#{base}/ns/dataset/#{var}/> . + @base <#{base}/dc/dataset/#{var}/> . + @prefix ns: <#{base}/dc/dataset/#{var}/> . @prefix qb: <http://purl.org/linked-data/cube#> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . - @prefix prop: <#{base}/dc/properties/> . + @prefix prop: <#{base}/properties/> . @prefix dct: <http://purl.org/dc/terms/> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . @prefix cs: <#{base}/dc/dataset/#{var}/cs/> . @prefix code: <#{base}/dc/dataset/#{var}/code/> . @prefix owl: <http://www.w3.org/2002/07/owl#> . @@ -141,12 +140,12 @@ def data_structure_definition(measures,dimensions,codes,var,options={}) var = sanitize([var]).first options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) - cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')} - cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')} + cs_dims = component_gen(rdf_dimensions,var,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')} + cs_meas = component_gen(rdf_measures,var,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')} str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n" cs_dims.map{|d| str << " qb:component #{d} ;\n" } @@ -170,29 +169,27 @@ end def component_specifications(measure_names, dimension_names, codes, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options) - cs_dims = component_gen(rdf_dimensions,options) - cs_meas = component_gen(rdf_measures,options) - # cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')} - # cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')} + cs_dims = component_gen(rdf_dimensions,var,options) + cs_meas = component_gen(rdf_measures,var,options) specs = [] rdf_dimensions.each_with_index.map{|d,i| specs << <<-EOF.unindent #{cs_dims[i]} a qb:ComponentSpecification ; - rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ; + rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))}" ; qb:dimension #{d} . EOF } rdf_measures.each_with_index.map{|n,i| specs << <<-EOF.unindent #{cs_meas[i]} a qb:ComponentSpecification ; - rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ; + rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))}" ; qb:measure #{n} . EOF } @@ -212,10 +209,11 @@ end } rdf_dimensions.each_with_index{|d,i| if dimension_codes.include?(dimensions[i]) + code = rdf_codes[dimension_codes.index(dimensions[i])] props << <<-EOF.unindent #{d} a rdf:Property, qb:DimensionProperty ; rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ; qb:codeList #{code[1]} ; @@ -223,13 +221,17 @@ EOF else props << <<-EOF.unindent #{d} a rdf:Property, qb:DimensionProperty ; - rdfs:label "#{strip_prefixes(strip_uri(d))}"@en . - + rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ; EOF + if options[:ranges] && options[:ranges][dimension[i]] + props.last << "\n rdfs:range #{options[:ranges][dimensions[i]]} .\n\n" + else + props.last[-2] = ".\n" + end end } props end @@ -237,27 +239,36 @@ def measure_properties(measures, var, options={}) options = defaults().merge(options) rdf_measures = generate_resources(measures, [], [], options)[0] props = [] - rdf_measures.map{ |m| + rdf_measures.each_with_index{ |m,i| - props << <<-EOF.unindent - #{m} a rdf:Property, qb:MeasureProperty ; - rdfs:label "#{strip_prefixes(strip_uri(m))}"@en . + props << <<-EOF.unindent + #{m} a rdf:Property, qb:MeasureProperty ; + rdfs:label "#{strip_prefixes(strip_uri(m))}"@en ; + EOF - EOF - } + if options[:ranges] && options[:ranges][measures[i]] + props.last << " rdfs:range #{options[:ranges][measures[i]]} .\n\n" + else + props.last[-2] = ".\n" + end + } props end def observations(measures, dimensions, codes, data, observation_labels, var, options={}) var = sanitize([var]).first measures = sanitize(measures) dimensions = sanitize(dimensions) + + data.each{|k,v| data[k]=Array(v)} + observation_labels = Array(observation_labels) options = defaults().merge(options) + rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) data = encode_data(codes, data, var, options) obs = [] dimension_codes = rdf_codes.map{|c| @@ -267,45 +278,69 @@ c[0] end } observation_labels.each_with_index.map{|r, i| - contains_nulls = false + # contains_nulls = false str = <<-EOF.unindent ns:obs#{r} a qb:Observation ; qb:dataSet ns:dataset-#{var} ; EOF str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels] + obs_index = 0 + obs_nodes = [] + dimensions.each_with_index{|d,j| - contains_nulls = contains_nulls | (data[d][i] == nil) + contains_nulls = (data[d][i] == nil) - if dimension_codes.include? d - # str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n" - str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n" - else - str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n" + unless contains_nulls && !options[:encode_nulls] + if is_complex?(data[d][i]) + str << " #{rdf_dimensions[j]} #{add_node(obs_index,add_node(r))} ;\n" + obs_nodes << encode_value(data[d][i], options, obs_index, add_node(r)) + else + str << " #{rdf_dimensions[j]} #{encode_value(data[d][i], options)} ;\n" + end end + + obs_index += 1 } measures.each_with_index{|m,j| - contains_nulls = contains_nulls | (data[m][i] == nil) - str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n" + contains_nulls = (data[m][i] == nil) + unless contains_nulls && !options[:encode_nulls] + if is_complex?(data[m][i]) + str << " #{rdf_measures[j]} #{add_node(obs_index,add_node(r))} ;\n" + val = encode_value(data[m][i], options, obs_index, add_node(r)) + + if val.last.is_a? Array + unless val.last.last[-2] == "." + val.last.last << ".\n" + end + end + + obs_nodes << val + else + str << " #{rdf_measures[j]} #{encode_value(data[m][i], options)} ;\n" + end + end + + obs_index += 1 } str << " .\n\n" - if contains_nulls && !options[:encode_nulls] - if options[:raise_nils] - raise "missing component for observation, skipping: #{str}, " - elsif options[:whiny_nils] - puts "missing component for observation, skipping: #{str}, " - end - else - obs << str + + if obs_nodes.size > 0 + flatted = obs_nodes.flatten + str << turtle_indent(flatted.join("\n")) + str << " \n\n" end + + obs << str + } obs end def code_lists(codes, data, var, options={}) @@ -354,11 +389,10 @@ if code[0] =~ /^<.+>$/ refcode = code[0][1..-2] else refcode = code[0] end - # puts data[refcode].uniq data[refcode].uniq.each_with_index{|value,i| unless value == nil && !options[:encode_nulls] concepts << <<-EOF.unindent #{to_resource(value,options)} a skos:Concept, #{code[2]}; skos:topConceptOf #{code[1]} ; @@ -375,10 +409,10 @@ def abbreviate_known(turtle_string) #debug method # puts turtle_string - turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2') + turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\w+)>/, 'code:\2').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, '<code/' + '\2' +'>') end end end end