lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.3 vs lib/bio-publisci/dataset/data_cube.rb in bio-publisci-0.0.4

- old
+ new

@@ -1,6 +1,6 @@ - #monkey patch to make rdf string w/ heredocs prettier ;) + #monkey patch to make rdf string w/ heredocs prettier ;) class String def unindent gsub /^#{self[/\A\s*/]}/, '' end end @@ -23,11 +23,11 @@ "<#{m}>" elsif m =~ /^[a-zA-z]+:[a-zA-z]+$/ m else "prop:#{m}" - end + end } newc = [] newd = dimensions.map{|d| @@ -52,17 +52,18 @@ end } } else newc = codes.map{|c| - ["#{c}","code:#{c.downcase}","code:#{c.downcase.capitalize}"] + ["#{sanitize(c).first}","code:#{sanitize(c).first.downcase}","code:#{sanitize(c).first.downcase.capitalize}"] } end [newm, newd, newc] end def encode_data(codes,data,var,options={}) + codes = sanitize(codes) new_data = {} data.map{|k,v| if codes.include? k new_data[k] = v.map{|val| if val =~ /^http:\/\// @@ -87,11 +88,11 @@ RDF.const_get(vocab) else nil end end - + def generate(measures, dimensions, codes, data, observation_labels, var, options={}) # dimensions = sanitize(dimensions) # codes = sanitize(codes) # measures = sanitize(measures) var = sanitize([var]).first @@ -152,22 +153,22 @@ end def dataset(var,options={}) var = sanitize([var]).first options = defaults().merge(options) - <<-EOF.unindent + <<-EOF.unindent ns:dataset-#{var} a qb:DataSet ; rdfs:label "#{var}"@en ; qb:structure ns:dsd-#{var} . EOF end def component_specifications(measure_names, dimension_names, var, options={}) options = defaults().merge(options) specs = [] - + dimension_names.map{|d| specs << <<-EOF.unindent cs:#{d} a qb:ComponentSpecification ; rdfs:label "#{d} Component" ; qb:dimension prop:#{d} . @@ -181,23 +182,23 @@ rdfs:label "#{n} Component" ; qb:measure prop:#{n} . EOF } - + specs end def dimension_properties(dimensions, codes, var, options={}) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources([], dimensions, codes, options) props = [] - dimension_codes = rdf_codes.map{|c| + dimension_codes = rdf_codes.map{|c| if c[0]=~/^<http:/ - c[0][1..-2] - else + c[0][1..-2] + else c[0] end } rdf_dimensions.each_with_index{|d,i| @@ -216,55 +217,57 @@ rdfs:label "#{strip_prefixes(strip_uri(d))}"@en . EOF end } - + props end def measure_properties(measures, var, options={}) options = defaults().merge(options) rdf_measures = generate_resources(measures, [], [], options)[0] props = [] - + rdf_measures.map{ |m| - + props << <<-EOF.unindent #{m} a rdf:Property, qb:MeasureProperty ; rdfs:label "#{strip_prefixes(strip_uri(m))}"@en . EOF } - + props end - def observations(measures, dimensions, codes, data, observation_labels, var, options={}) + def observations(measures, dimensions, codes, data, observation_labels, var, options={}) var = sanitize([var]).first + measures = sanitize(measures) + dimensions = sanitize(dimensions) options = defaults().merge(options) rdf_measures, rdf_dimensions, rdf_codes = generate_resources(measures, dimensions, codes, options) data = encode_data(codes, data, var, options) obs = [] - - dimension_codes = rdf_codes.map{|c| + + dimension_codes = rdf_codes.map{|c| if c[0]=~/^<http:/ - c[0][1..-2] - else + c[0][1..-2] + else c[0] end } observation_labels.each_with_index.map{|r, i| contains_nulls = false - str = <<-EOF.unindent + str = <<-EOF.unindent ns:obs#{r} a qb:Observation ; qb:dataSet ns:dataset-#{var} ; EOF str << " rdfs:label \"#{r}\" ;\n" unless options[:no_labels] - + dimensions.each_with_index{|d,j| contains_nulls = contains_nulls | (data[d][i] == nil) if dimension_codes.include? d # str << " #{rdf_dimensions[j]} <code/#{d.downcase}/#{data[d][i]}> ;\n" @@ -274,23 +277,23 @@ end } measures.each_with_index{|m,j| contains_nulls = contains_nulls | (data[m][i] == nil) - str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n" - + str << " #{rdf_measures[j]} #{to_literal(data[m][i], options)} ;\n" + } str << " .\n\n" if contains_nulls && !options[:encode_nulls] if options[:raise_nils] raise "missing component for observation, skipping: #{str}, " elsif options[:whiny_nils] puts "missing component for observation, skipping: #{str}, " end else - obs << str + obs << str end } obs end @@ -321,16 +324,16 @@ data[refcode].uniq.map{|value| unless value == nil && !options[:encode_nulls] str << " skos:hasTopConcept #{to_resource(value,options)} ;\n" end } - + str << " .\n\n" lists << str } - + lists end def concept_codes(codes, data, var, options={}) options = defaults().merge(options) @@ -360,9 +363,10 @@ end def abbreviate_known(turtle_string) #debug method + # puts turtle_string turtle_string.gsub(/<http:\/\/www\.rqtl\.org\/dc\/properties\/(\S+)>/, 'prop:\1').gsub(/<http:\/\/www.rqtl.org\/ns\/dc\/code\/(\S+)\/(\S+)>/, '<code/\1/\2>').gsub(/<http:\/\/www.rqtl.org\/dc\/dataset\/(\S+)\/code\/(\S+)>/, 'code:\2') end end end end