class String def unindent gsub /^#{self[/\A\s*/]}/, '' end end module PubliSci class Metadata module Generator include PubliSci::Parser def defaults { encode_nulls: false, base_url: "http://www.rqtl.org", } end def basic(fields) #TODO don't assume base dataset is "ns:dataset-var", #make it just "var", and try to make that clear to calling classes fields[:var] = sanitize([fields[:var]]).first unless fields[:creator] if ENV['USER'] fields[:creator] = ENV['USER'] elsif ENV['USERNAME'] fields[:creator] = ENV['USERNAME'] end end fields[:date] = Time.now.strftime("%Y-%m-%d") unless fields[:date] #TODO some of these should probably be resources, eg dct:creator, or put under DC namespace str = <<-EOF.unindent ns:dataset-#{fields[:var]} rdfs:label "#{fields[:title]}"; dct:title "#{fields[:title]}"; dct:creator "#{fields[:creator]}"; rdfs:comment "#{fields[:description]}"; dct:description "#{fields[:description]}"; dct:issued "#{fields[:date]}"^^xsd:date. EOF end_str = "" if fields[:subject] && fields[:subject].size > 0 str << " dct:subject" fields[:subject].each{|subject| sub = RDF::Resource(subject) sub = RDF::Literal(subject) unless sub.valid? str << " " + sub.to_base + ",\n" } str[-2] = ";" end if fields[:publishers] fields[:publishers].map{|publisher| raise "No URI for publisher #{publisher}" unless publisher[:uri] raise "No label for publisher #{publisher}" unless publisher[:label] str << " dct:publisher <#{publisher[:uri]}> ;\n" end_str << "<#{publisher[:uri]}> a org:Organization, foaf:Agent;\n rdfs:label \"#{publisher[:label]}\" .\n\n" } str[-2] = '.' end str + "\n" + end_str end def provenance(original, triplified, chain, options={}) #TODO: should either add a prefixes method or replace some with full URIs raise "MissingOriginal: must specify a provenance source" unless original && original[:resource] #TODO include file type etc, or create a separate method for it str = <<-EOF.unindent <#{original[:resource]}> a prov:Entity ; prov:wasGeneratredBy ns:activity-1 . ns:activity-1 a prov:Activity ; prov:generated <#{original[:resource]}> . EOF if original[:software] original_assoc_id = Time.now.nsec.to_s(32) str << <<-EOF.unindent <#{original[:software]}> a prov:Entity. ns:activity-1 prov:qualifiedAssociation ns:assoc-1_#{original_assoc_id} . ns:assoc-1_#{original_assoc_id} a prov:Assocation ; prov:entity <#{original[:software]}> . EOF if original[:process] original[:process] = IO.read(original[:process]) if File.exist? original[:process] steps = '"' + original[:process].split("\n").join('" "') + '"' str << <<-EOF.unindent ns:assoc-1_#{original_assoc_id} prov:hadPlan ns:plan-1. ns:plan-1 a prov:Plan ; rdfs:comment (#{steps}); EOF end end if original[:author] str << "<#{original[:author]}> a prov:Agent, prov:Person .\n" str << "ns:activity-1 prov:wasAssociatedWith <#{original[:author]}> .\n" str << "<#{original[:author]}> foaf:givenName \"#{original[:author_name]}\" .\n" if original[:author_name] if original[:organization] str << "<#{original[:author]}> prov:actedOnBehalfOf <#{original[:organization]}> .\n\n" str << "<#{original[:organization]}> a prov:Agent, prov:Organization.\n" if original[:organization_name] str << "<#{original[:organization]}> foaf:name \"#{original[:organization_name]}\" .\n\n" else str << "\n" end else str << "\n" end end if triplified triples_assoc_id = Time.now.nsec.to_s(32) str << <<-EOF.unindent <#{triplified[:resource]}> a prov:Entity; prov:wasGeneratredBy ns:activity-0 . a prov:Agent, prov:SoftwareAgent ; rdfs:label "Semantic Publishing Toolkit" . ns:activity-0 a prov:Activity ; prov:qualifiedAssociation ns:assoc-0_#{triples_assoc_id}; prov:generated <#{triplified[:resource]}> ; prov:used <#{original[:resource]}> . ns:assoc-0_#{triples_assoc_id} a prov:Assocation ; prov:entity ; prov:hadPlan ns:plan-0. ns:plan-0 a prov:Plan ; rdfs:comment "generation of <#{triplified[:resource]}> by R2RDF gem" . EOF if triplified[:author] str << "<#{triplified[:author]}> a prov:Agent, prov:Person .\n" str << "<#{triplified[:author]}> foaf:givenName \"#{triplified[:author_name]}\" .\n" if triplified[:author_name] if triplified[:organization] str << "<#{triplified[:author]}> prov:actedOnBehalfOf <#{triplified[:organization]}> .\n\n" str << "<#{triplified[:organization]}> a prov:Agent, prov:Organization.\n" if triplified[:organization_name] str << "<#{triplified[:organization]}> foaf:name \"#{triplified[:organization_name]}\" .\n\n" else str << "\n" end else str << "\n" end end end if chain str << "ns:activity-1 prov:used <#{chain.first[:resource]}> .\n" str << "<#{original[:resource]}> prov:wasDerivedFrom <#{chain.first[:resource]}> .\n\n" chain.each_with_index{ |src,i| if i == chain.size-1 str << activity(src[:resource],nil,src) else str << activity(src[:resource],chain[i+1][:resource],src) end } end str end def activity(entity, used, options={}) assoc_id = Time.now.nsec.to_s(32) activity_id = Time.now.nsec.to_s(32) plan_id = Time.now.nsec.to_s(32) raise "NoEntityGiven: activity generation requires a subject entity" unless entity entity_str = <<-EOF.unindent <#{entity}> a prov:Entity ; prov:wasGeneratredBy ns:activity-a_#{activity_id} ; EOF activity_str = <<-EOF.unindent ns:activity-a_#{activity_id} a prov:Activity ; prov:generated <#{entity}> ; EOF if used entity_str << "\tprov:wasDerivedFrom <#{used}> . \n\n" activity_str << "\tprov:used <#{used}> . \n\n" else entity_str[-2] = ".\n" activity_str[-2] = ".\n" end activity_str << <<-EOF.unindent ns:activity-a_#{activity_id} prov:qualifiedAssociation ns:assoc-s_#{assoc_id} . ns:assoc-s_#{assoc_id} a prov:Assocation . EOF if options[:software] activity_str << <<-EOF.unindent <#{options[:software]}> a prov:Entity . ns:assoc-s_#{assoc_id} prov:agent <#{options[:software]}> . EOF if options[:process] options[:process] = IO.read(options[:process]) if File.exist? options[:process] steps = '"' + options[:process].split("\n").join('" "') + '"' activity_str << <<-EOF.unindent ns:assoc-s_#{assoc_id} prov:hadPlan ns:plan-p_#{plan_id}. ns:plan-p_#{plan_id} a prov:Plan ; rdfs:comment (#{steps}); EOF end end if options[:author] entity_str << "<#{options[:author]}> a prov:Agent, prov:Person .\n" entity_str << "<#{options[:author]}> foaf:givenName \"#{options[:author_name]}\" .\n" if options[:author_name] activity_str << "ns:activity-a_#{activity_id} prov:wasAssociatedWith <#{options[:author]}> .\n" activity_str << "ns:assoc-s_#{assoc_id} prov:agent <#{options[:author]}> .\n" if options[:organization] entity_str << "<#{options[:organization]}> a prov:Agent, prov:Organization .\n" activity_str << "<#{options[:author]}> prov:actedOnBehalfOf <#{options[:organization]}> .\n\n" if options[:organization_name] entity_str << "<#{options[:organization]}> foaf:name \"#{options[:organization_name]}\" .\n\n" end else activity_str << "\n" # entity_str << "\n" end end entity_str + "\n" + activity_str end def process(id, step_string, software_resource, software_var, options={}) #TODO a better predicate for the steplist than rdfs:comment # and make sure it looks good. steps = '"' + step_string.split("\n").join('" "') + '"' assoc_id = Time.now.nsec.to_s(32) str = <<-EOF.unindent ns:activity-#{id} a prov:Activity ; prov:qualifiedAssociation ns:assoc-#{assoc_id} ; prov:used . ns:assoc-#{id}_#{assoc_id} a prov:Assocation ; prov:entity <#{software_resource}>; prov:hadPlan ns:plan-#{id}. ns:plan-#{id} a prov:Plan ; rdfs:comment (#{steps}); EOF end def r2rdf_metadata str <<-EOF.unindent <#{options[:base_url]}/ns/R2RDF> a foaf:Agent; foaf:name "R2RDF Semantic Web Toolkit"; org:memberOf EOF end def org_metadata str <<-EOF.unindent a org:Organization, prov:Organization; skos:prefLabel "SciRuby"; rdfs:description "A Project to Build and Improve Tools for Scientific Computing in Ruby". EOF end def metadata_help(topic=nil) if topic puts "This should display help information for #{topic}, but there's none here yet :(" else puts <<-EOF.unindent Available metadata fields: (Field) (Ontology) (Description) publishers dct/foaf/org The Organization/s responsible for publishing the dataset subject dct The subject of this dataset. Use resources when possible var dct The name of the datset resource (used internally) creator dct The person or process responsible for creating the dataset description dct/rdfs A descriptions of the dataset issued dct The date of issuance for the dataset EOF end end end end end