lib/dataset.rb in opentox-ruby-0.0.2 vs lib/dataset.rb in opentox-ruby-1.0.0
- old
+ new
@@ -12,11 +12,11 @@
# dataset = OpenTox::Dataset.new
# @example Create an empty dataset with URI
# dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1")
# @param [optional, String] uri Dataset URI
# @return [OpenTox::Dataset] Dataset object
- def initialize(uri=nil)
+ def initialize(uri=nil,subjectid=nil)
super uri
@features = {}
@compounds = []
@data_entries = {}
end
@@ -24,75 +24,94 @@
# Create an empty dataset and save it at the dataset service (assigns URI to dataset)
# @example Create new dataset and save it to obtain a URI
# dataset = OpenTox::Dataset.create
# @param [optional, String] uri Dataset URI
# @return [OpenTox::Dataset] Dataset object
- def self.create(uri=CONFIG[:services]["opentox-dataset"])
- dataset = Dataset.new
- dataset.save
+ def self.create(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil)
+ dataset = Dataset.new(nil,subjectid)
+ dataset.save(subjectid)
dataset
end
# Create dataset from CSV file (format specification: http://toxcreate.org/help)
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
# @param [String] file CSV file path
# @return [OpenTox::Dataset] Dataset object with CSV data
- def self.create_from_csv_file(file)
- dataset = Dataset.create
+ def self.create_from_csv_file(file, subjectid=nil)
+ dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
parser = Parser::Spreadsheets.new
parser.dataset = dataset
parser.load_csv(File.open(file).read)
- dataset.save
+ dataset.save(subjectid)
dataset
end
-
+
# Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading.
# @param [String] uri Dataset URI
# @return [OpenTox::Dataset] Dataset object with all data
- def self.find(uri)
- dataset = Dataset.new(uri)
- dataset.load_all
+ def self.find(uri, subjectid=nil)
+ return nil unless uri
+ dataset = Dataset.new(uri, subjectid)
+ dataset.load_all(subjectid)
dataset
end
+
+ # replaces find as exist check, takes not as long, does NOT raise an un-authorized exception
+ # @param [String] uri Dataset URI
+ # @return [Boolean] true if dataset exists and user has get rights, false else
+ def self.exist?(uri, subjectid=nil)
+ return false unless uri
+ dataset = Dataset.new(uri, subjectid)
+ begin
+ dataset.load_metadata( subjectid ).size > 0
+ rescue
+ false
+ end
+ end
# Get all datasets from a service
# @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration
# @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server)
- def self.all(uri=CONFIG[:services]["opentox-dataset"])
- RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)}
+ def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil)
+ RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u, subjectid)}
end
# Load YAML representation into the dataset
# @param [String] yaml YAML representation of the dataset
# @return [OpenTox::Dataset] Dataset object with YAML data
def load_yaml(yaml)
copy YAML.load(yaml)
end
def load_rdfxml(rdfxml)
- load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path
+ raise "rdfxml data is empty" if rdfxml.to_s.size==0
+ file = Tempfile.new("ot-rdfxml")
+ file.puts rdfxml
+ file.close
+ load_rdfxml_file file
+ file.delete
end
# Load RDF/XML representation from a file
# @param [String] file File with RDF/XML representation of the dataset
# @return [OpenTox::Dataset] Dataset object with RDF/XML data
- def load_rdfxml_file(file)
- parser = Parser::Owl::Dataset.new @uri
+ def load_rdfxml_file(file, subjectid=nil)
+ parser = Parser::Owl::Dataset.new @uri, subjectid
parser.uri = file.path
- copy parser.load_uri
+ copy parser.load_uri(subjectid)
end
# Load CSV string (format specification: http://toxcreate.org/help)
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
# @param [String] csv CSV representation of the dataset
# @return [OpenTox::Dataset] Dataset object with CSV data
- def load_csv(csv)
- save unless @uri # get a uri for creating features
+ def load_csv(csv, subjectid=nil)
+ save(subjectid) unless @uri # get a uri for creating features
parser = Parser::Spreadsheets.new
parser.dataset = self
parser.load_csv(csv)
end
@@ -100,49 +119,49 @@
# - loads data_entries, compounds, features
# - sets metadata (warnings) for parser errors
# - you will have to set remaining metadata manually
# @param [Excel] book Excel workbook object (created with roo gem)
# @return [OpenTox::Dataset] Dataset object with Excel data
- def load_spreadsheet(book)
- save unless @uri # get a uri for creating features
+ def load_spreadsheet(book, subjectid=nil)
+ save(subjectid) unless @uri # get a uri for creating features
parser = Parser::Spreadsheets.new
parser.dataset = self
parser.load_spreadsheet(book)
end
# Load and return only metadata of a Dataset object
# @return [Hash] Metadata of the dataset
- def load_metadata
- add_metadata Parser::Owl::Dataset.new(@uri).load_metadata
+ def load_metadata(subjectid=nil)
+ add_metadata Parser::Owl::Dataset.new(@uri, subjectid).load_metadata(subjectid)
self.uri = @uri if @uri # keep uri
@metadata
end
# Load all data (metadata, data_entries, compounds and features) from URI
- def load_all
+ def load_all(subjectid=nil)
if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
- copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml"))
+ copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid}))
else
- parser = Parser::Owl::Dataset.new(@uri)
- copy parser.load_uri
+ parser = Parser::Owl::Dataset.new(@uri, subjectid)
+ copy parser.load_uri(subjectid)
end
end
# Load and return only compound URIs from the dataset service
# @return [Array] Compound URIs in the dataset
- def load_compounds
- RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri|
+ def load_compounds(subjectid=nil)
+ RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
@compounds << compound_uri.chomp
end
@compounds.uniq!
end
# Load and return only features from the dataset service
# @return [Hash] Features of the dataset
- def load_features
- parser = Parser::Owl::Dataset.new(@uri)
- @features = parser.load_features
+ def load_features(subjectid=nil)
+ parser = Parser::Owl::Dataset.new(@uri, subjectid)
+ @features = parser.load_features(subjectid)
@features
end
# Detect feature type(s) in the dataset
# @return [String] `classification", "regression", "mixed" or unknown`
@@ -216,11 +235,11 @@
def add (compound,feature,value)
@compounds << compound unless @compounds.include? compound
@features[feature] = {} unless @features[feature]
@data_entries[compound] = {} unless @data_entries[compound]
@data_entries[compound][feature] = [] unless @data_entries[compound][feature]
- @data_entries[compound][feature] << value
+ @data_entries[compound][feature] << value if value!=nil
end
# Add/modify metadata, existing entries will be overwritten
# @example
# dataset.add_metadata({DC.title => "any_title", DC.creator => "my_email"})
@@ -240,38 +259,74 @@
# @param [String] feature Feature URI
# @param [Hash] metadata Hash with feature metadata
def add_feature_metadata(feature,metadata)
metadata.each { |k,v| @features[feature][k] = v }
end
+
+ # Add a new compound
+ # @param [String] compound Compound URI
+ def add_compound (compound)
+ @compounds << compound unless @compounds.include? compound
+ end
+
+ # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features
+ # @param [Array] compounds List of compound URIs
+ # @param [Array] features List of feature URIs
+ # @param [Hash] metadata Hash containing the metadata for the new dataset
+ # @param [String] subjectid
+ # @return [OpenTox::Dataset] newly created dataset, already saved
+ def split( compounds, features, metadata, subjectid=nil)
+ LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds"
+ raise "no new compounds selected" unless compounds and compounds.size>0
+ dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid)
+ if features.size==0
+ compounds.each{ |c| dataset.add_compound(c) }
+ else
+ compounds.each do |c|
+ features.each do |f|
+ unless @data_entries[c][f]
+ dataset.add(c,f,nil)
+ else
+ @data_entries[c][f].each do |v|
+ dataset.add(c,f,v)
+ end
+ end
+ end
+ end
+ end
+ dataset.add_metadata(metadata)
+ dataset.save(subjectid)
+ dataset
+ end
# Save dataset at the dataset service
# - creates a new dataset if uri is not set
# - overwrites dataset if uri exists
# @return [String] Dataset URI
- def save
+ def save(subjectid=nil)
# TODO: rewrite feature URI's ??
@compounds.uniq!
if @uri
if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
- RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml)
+ RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
else
File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path }
- task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp
+ task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :subjectid => subjectid}).to_s.chomp
#task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset`
Task.find(task_uri).wait_for_completion
- self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list')
+ self.uri = RestClientWrapper.get(task_uri,{:accept => 'text/uri-list', :subjectid => subjectid})
end
else
# create dataset if uri is empty
- self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp
+ self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:subjectid => subjectid}).to_s.chomp
end
@uri
end
# Delete dataset at the dataset service
- def delete
- RestClientWrapper.delete @uri
+ def delete(subjectid=nil)
+ RestClientWrapper.delete(@uri, :subjectid => subjectid)
end
private
# Copy a dataset (rewrites URI)
def copy(dataset)
@@ -291,12 +346,12 @@
class LazarPrediction < Dataset
# Find a prediction dataset and load all data.
# @param [String] uri Prediction dataset URI
# @return [OpenTox::Dataset] Prediction dataset object with all data
- def self.find(uri)
- prediction = LazarPrediction.new(uri)
- prediction.load_all
+ def self.find(uri, subjectid=nil)
+ prediction = LazarPrediction.new(uri, subjectid)
+ prediction.load_all(subjectid)
prediction
end
def value(compound)
@data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first