lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.3 vs lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.4
- old
+ new
@@ -1,6 +1,6 @@
- #monkey patch to make rdf string w/ heredocs prettier ;)
+ #monkey patch to make rdf string w/ heredocs prettier ;)
class String
def unindent
gsub /^#{self[/\A\s*/]}/, ''
end
end
@@ -23,11 +23,11 @@
"<#{m}>"
elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/
m
else
"prop:#{m}"
- end
+ end
}
newc = []
newd = dimensions.map{|d|
@@ -52,17 +52,18 @@
end
}
}
else
newc = codes.map{|c|
- ["#{c}","code:#{c.downcase}","code:#{c.downcase.capitalize}"]
+ ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"]
}
end
[newm, newd, newc]
end
def encode_data(codes,data,var,options={})
+ codes = sanitize(codes)
new_data = {}
data.map{|k,v|
if codes.include? k
new_data[k] = v.map{|val|
if val =~ /^http:\/\//
@@ -87,11 +88,11 @@
RDF.const_get(vocab)
else
nil
end
end
-
+
def generate(measures, dimensions, codes, data, observation_labels, var, options={})
# dimensions = sanitize(dimensions)
# codes = sanitize(codes)
# measures = sanitize(measures)
var = sanitize([var]).first
@@ -152,22 +153,22 @@
end
def dataset(var,options={})
var = sanitize([var]).first
options = defaults().merge(options)
- <<-EOF.unindent
+ <<-EOF.unindent
ns:dataset-#{var} a qb:DataSet ;
rdfs:label "#{var}"@en ;
qb:structure ns:dsd-#{var} .
EOF
end
def component_specifications(measure_names, dimension_names, var, options={})
options = defaults().merge(options)
specs = []
-
+
dimension_names.map{|d|
specs << <<-EOF.unindent
cs:#{d} a qb:ComponentSpecification ;
rdfs:label "#{d} Component" ;
qb:dimension prop:#{d} .
@@ -181,23 +182,23 @@
rdfs:label "#{n} Component" ;
qb:measure prop:#{n} .
EOF
}
-
+
specs
end
def dimension_properties(dimensions, codes, var, options={})
options = defaults().merge(options)
rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options)
props = []
- dimension_codes = rdf_codes.map{|c|
+ dimension_codes = rdf_codes.map{|c|
if c[0]=~/^<http:/
- c[0][1..-2]
- else
+ c[0][1..-2]
+ else
c[0]
end
}
rdf_dimensions.each_with_index{|d,i|
@@ -216,55 +217,57 @@
rdfs:label "#{strip_prefixes(strip_uri(d))}"@en .
EOF
end
}
-
+
props
end
def measure_properties(measures, var, options={})
options = defaults().merge(options)
rdf_measures = generate_resources(measures, [], [], options)[0]
props = []
-
+
rdf_measures.map{ |m|
-
+
props << <<-EOF.unindent
#{m} a rdf:Property, qb:MeasureProperty ;
rdfs:label "#{strip_prefixes(strip_uri(m))}"@en .
EOF
}
-
+
props
end
- def observations(measures, dimensions, codes, data, observation_labels, var, options={})
+ def observations(measures, dimensions, codes, data, observation_labels, var, options={})
var = sanitize([var]).first
+ measures = sanitize(measures)
+ dimensions = sanitize(dimensions)
options = defaults().merge(options)
rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options)
data = encode_data(codes, data, var, options)
obs = []
-
- dimension_codes = rdf_codes.map{|c|
+
+ dimension_codes = rdf_codes.map{|c|
if c[0]=~/^<http:/
- c[0][1..-2]
- else
+ c[0][1..-2]
+ else
c[0]
end
}
observation_labels.each_with_index.map{|r, i|
contains_nulls = false
- str = <<-EOF.unindent
+ str = <<-EOF.unindent
ns:obs#{r} a qb:Observation ;
qb:dataSet ns:dataset-#{var} ;
EOF
str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels]
-
+
dimensions.each_with_index{|d,j|
contains_nulls = contains_nulls | (data[d][i] == nil)
if dimension_codes.include? d
# str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n"
@@ -274,23 +277,23 @@
end
}
measures.each_with_index{|m,j|
contains_nulls = contains_nulls | (data[m][i] == nil)
- str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
-
+ str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n"
+
}
str << " .\n\n"
if contains_nulls && !options[:encode_nulls]
if options[:raise_nils]
raise "missing component for observation, skipping: #{str}, "
elsif options[:whiny_nils]
puts "missing component for observation, skipping: #{str}, "
end
else
- obs << str
+ obs << str
end
}
obs
end
@@ -321,16 +324,16 @@
data[refcode].uniq.map{|value|
unless value == nil && !options[:encode_nulls]
str << " skos:hasTopConcept #{to_resource(value,options)} ;\n"
end
}
-
+
str << " .\n\n"
lists << str
}
-
+
lists
end
def concept_codes(codes, data, var, options={})
options = defaults().merge(options)
@@ -360,9 +363,10 @@
end
def abbreviate_known(turtle_string)
#debug method
+ # puts turtle_string
turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2')
end
end
end
end