lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.8 vs lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.1.0
- old
+ new
@@ -11,11 +11,11 @@
include PubliSci::Parser
def defaults
{
type: :dataframe,
encode_nulls: false,
- base_url: "http://www.rqtl.org",
+ base_url: "http://onto.strinz.me",
}
end
def generate_resources(measures, dimensions, codes, options={})
newm = measures.map {|m|
@@ -30,16 +30,14 @@
newc = []
newd = dimensions.map{|d|
if d =~ /^http:\/\//
- # newc << "<#{d}>" if codes.include? d
"<#{d}>"
elsif d =~ /^[a-zA-z]+:[a-zA-z]+$/
d
else
- # newc << "prop:#{d}" if codes.include? d
"prop:#{d}"
end
}
if codes.first.is_a? Array
@@ -58,13 +56,14 @@
}
end
[newm, newd, newc]
end
- def component_gen(args,options={})
+ def component_gen(args,var,options={})
args = Array[args].flatten
- args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
+ args = args.map{|arg| arg.gsub("prop:","cs:").gsub(%r{<#{options[:base_url]}/.+/(\w.+)>$},'cs:'+'\1')}
+ args.map{|arg| arg.gsub(%r{<http://(.+)>},"<#{options[:base_url]}/dc/dataset/#{var}/cs/"+'\1'+'>')}
end
def encode_data(codes,data,var,options={})
codes = sanitize(codes)
new_data = {}
@@ -74,11 +73,11 @@
if val =~ /^http:\/\//
"<#{val}>"
elsif val =~ /^[a-zA-z]+:[a-zA-z]+$/
val
else
- "<code/#{k.downcase}/#{val}>"
+ "<code/#{k.downcase}/#{sanitize(val).first}>"
end
}
else
new_data[k] = v
end
@@ -118,16 +117,16 @@
def prefixes(var, options={})
var = sanitize([var]).first
options = defaults().merge(options)
base = options[:base_url]
<<-EOF.unindent
- @base <#{base}/ns/dc/> .
- @prefix ns: <#{base}/ns/dataset/#{var}/> .
+ @base <#{base}/dc/dataset/#{var}/> .
+ @prefix ns: <#{base}/dc/dataset/#{var}/> .
@prefix qb: <http://purl.org/linked-data/cube#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
- @prefix prop: <#{base}/dc/properties/> .
+ @prefix prop: <#{base}/properties/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix cs: <#{base}/dc/dataset/#{var}/cs/> .
@prefix code: <#{base}/dc/dataset/#{var}/code/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@@ -141,12 +140,12 @@
def data_structure_definition(measures,dimensions,codes,var,options={})
var = sanitize([var]).first
options = defaults().merge(options)
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
- cs_dims = component_gen(rdf_dimensions,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
- cs_meas = component_gen(rdf_measures,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
+ cs_dims = component_gen(rdf_dimensions,var,options) #rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
+ cs_meas = component_gen(rdf_measures,var,options) #rdf_measures.map!{|m| m.gsub('prop:','cs:')}
str = "ns:dsd-#{var} a qb:DataStructureDefinition;\n"
cs_dims.map{|d|
str << " qb:component #{d} ;\n"
}
@@ -170,29 +169,27 @@
end
def component_specifications(measure_names, dimension_names, codes, var, options={})
options = defaults().merge(options)
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measure_names, dimension_names, codes, options)
- cs_dims = component_gen(rdf_dimensions,options)
- cs_meas = component_gen(rdf_measures,options)
- # cs_dims = rdf_dimensions.map{|d| d.gsub('prop:','cs:')}
- # cs_meas = rdf_measures.map{|m| m.gsub('prop:','cs:')}
+ cs_dims = component_gen(rdf_dimensions,var,options)
+ cs_meas = component_gen(rdf_measures,var,options)
specs = []
rdf_dimensions.each_with_index.map{|d,i|
specs << <<-EOF.unindent
#{cs_dims[i]} a qb:ComponentSpecification ;
- rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))} Component" ;
+ rdfs:label "#{strip_prefixes(strip_uri(dimension_names[i]))}" ;
qb:dimension #{d} .
EOF
}
rdf_measures.each_with_index.map{|n,i|
specs << <<-EOF.unindent
#{cs_meas[i]} a qb:ComponentSpecification ;
- rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))} Component" ;
+ rdfs:label "#{strip_prefixes(strip_uri(measure_names[i]))}" ;
qb:measure #{n} .
EOF
}
@@ -212,10 +209,11 @@
end
}
rdf_dimensions.each_with_index{|d,i|
if dimension_codes.include?(dimensions[i])
+
code = rdf_codes[dimension_codes.index(dimensions[i])]
props << <<-EOF.unindent
#{d} a rdf:Property, qb:DimensionProperty ;
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
qb:codeList #{code[1]} ;
@@ -223,13 +221,17 @@
EOF
else
props << <<-EOF.unindent
#{d} a rdf:Property, qb:DimensionProperty ;
- rdfs:label "#{strip_prefixes(strip_uri(d))}"@en .
-
+ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en ;
EOF
+ if options[:ranges] && options[:ranges][dimension[i]]
+ props.last << "\n rdfs:range #{options[:ranges][dimensions[i]]} .\n\n"
+ else
+ props.last[-2] = ".\n"
+ end
end
}
props
end
@@ -237,27 +239,36 @@
def measure_properties(measures, var, options={})
options = defaults().merge(options)
rdf_measures = generate_resources(measures, [], [], options)[0]
props = []
- rdf_measures.map{ |m|
+ rdf_measures.each_with_index{ |m,i|
- props << <<-EOF.unindent
- #{m} a rdf:Property, qb:MeasureProperty ;
- rdfs:label "#{strip_prefixes(strip_uri(m))}"@en .
+ props << <<-EOF.unindent
+ #{m} a rdf:Property, qb:MeasureProperty ;
+ rdfs:label "#{strip_prefixes(strip_uri(m))}"@en ;
+ EOF
- EOF
- }
+ if options[:ranges] && options[:ranges][measures[i]]
+ props.last << " rdfs:range #{options[:ranges][measures[i]]} .\n\n"
+ else
+ props.last[-2] = ".\n"
+ end
+ }
props
end
def observations(measures, dimensions, codes, data, observation_labels, var, options={})
var = sanitize([var]).first
measures = sanitize(measures)
dimensions = sanitize(dimensions)
+
+ data.each{|k,v| data[k]=Array(v)}
+ observation_labels = Array(observation_labels)
options = defaults().merge(options)
+
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
data = encode_data(codes, data, var, options)
obs = []
dimension_codes = rdf_codes.map{|c|
@@ -267,45 +278,69 @@
c[0]
end
}
observation_labels.each_with_index.map{|r, i|
- contains_nulls = false
+ # contains_nulls = false
str = <<-EOF.unindent
ns:obs#{r} a qb:Observation ;
qb:dataSet ns:dataset-#{var} ;
EOF
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
+ obs_index = 0
+ obs_nodes = []
+
dimensions.each_with_index{|d,j|
- contains_nulls = contains_nulls | (data[d][i] == nil)
+ contains_nulls = (data[d][i] == nil)
- if dimension_codes.include? d
- # str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n"
- str << " #{rdf_dimensions[j]} #{to_resource(data[d][i], options)} ;\n"
- else
- str << " #{rdf_dimensions[j]} #{to_literal(data[d][i], options)} ;\n"
+ unless contains_nulls && !options[:encode_nulls]
+ if is_complex?(data[d][i])
+ str << " #{rdf_dimensions[j]} #{add_node(obs_index,add_node(r))} ;\n"
+ obs_nodes << encode_value(data[d][i], options, obs_index, add_node(r))
+ else
+ str << " #{rdf_dimensions[j]} #{encode_value(data[d][i], options)} ;\n"
+ end
end
+
+ obs_index += 1
}
measures.each_with_index{|m,j|
- contains_nulls = contains_nulls | (data[m][i] == nil)
- str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
+ contains_nulls = (data[m][i] == nil)
+ unless contains_nulls && !options[:encode_nulls]
+ if is_complex?(data[m][i])
+ str << " #{rdf_measures[j]} #{add_node(obs_index,add_node(r))} ;\n"
+ val = encode_value(data[m][i], options, obs_index, add_node(r))
+
+ if val.last.is_a? Array
+ unless val.last.last[-2] == "."
+ val.last.last << ".\n"
+ end
+ end
+
+ obs_nodes << val
+ else
+ str << " #{rdf_measures[j]} #{encode_value(data[m][i], options)} ;\n"
+ end
+ end
+
+ obs_index += 1
}
str << " .\n\n"
- if contains_nulls && !options[:encode_nulls]
- if options[:raise_nils]
- raise "missing component for observation, skipping: #{str}, "
- elsif options[:whiny_nils]
- puts "missing component for observation, skipping: #{str}, "
- end
- else
- obs << str
+
+ if obs_nodes.size > 0
+ flatted = obs_nodes.flatten
+ str << turtle_indent(flatted.join("\n"))
+ str << " \n\n"
end
+
+ obs << str
+
}
obs
end
def code_lists(codes, data, var, options={})
@@ -354,11 +389,10 @@
if code[0] =~ /^<.+>$/
refcode = code[0][1..-2]
else
refcode = code[0]
end
- # puts data[refcode].uniq
data[refcode].uniq.each_with_index{|value,i|
unless value == nil && !options[:encode_nulls]
concepts << <<-EOF.unindent
#{to_resource(value,options)} a skos:Concept, #{code[2]};
skos:topConceptOf #{code[1]} ;
@@ -375,10 +409,10 @@
def abbreviate_known(turtle_string)
#debug method
# puts turtle_string
- turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2')
+ turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\w+)>/, 'code:\2').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, '<code/' + '\2' +'>')
end
end
end
end