lib/dataset.rb in opentox-ruby-0.0.2 vs lib/dataset.rb in opentox-ruby-1.0.0

- old
+ new

@@ -12,11 +12,11 @@ # dataset = OpenTox::Dataset.new # @example Create an empty dataset with URI # dataset = OpenTox::Dataset.new("http:://webservices.in-silico/ch/dataset/1") # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def initialize(uri=nil) + def initialize(uri=nil,subjectid=nil) super uri @features = {} @compounds = [] @data_entries = {} end @@ -24,75 +24,94 @@ # Create an empty dataset and save it at the dataset service (assigns URI to dataset) # @example Create new dataset and save it to obtain a URI # dataset = OpenTox::Dataset.create # @param [optional, String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object - def self.create(uri=CONFIG[:services]["opentox-dataset"]) - dataset = Dataset.new - dataset.save + def self.create(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) + dataset = Dataset.new(nil,subjectid) + dataset.save(subjectid) dataset end # Create dataset from CSV file (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually # @param [String] file CSV file path # @return [OpenTox::Dataset] Dataset object with CSV data - def self.create_from_csv_file(file) - dataset = Dataset.create + def self.create_from_csv_file(file, subjectid=nil) + dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid) parser = Parser::Spreadsheets.new parser.dataset = dataset parser.load_csv(File.open(file).read) - dataset.save + dataset.save(subjectid) dataset end - + # Find a dataset and load all data. This can be time consuming, use Dataset.new together with one of the load_* methods for a fine grained control over data loading. # @param [String] uri Dataset URI # @return [OpenTox::Dataset] Dataset object with all data - def self.find(uri) - dataset = Dataset.new(uri) - dataset.load_all + def self.find(uri, subjectid=nil) + return nil unless uri + dataset = Dataset.new(uri, subjectid) + dataset.load_all(subjectid) dataset end + + # replaces find as exist check, takes not as long, does NOT raise an un-authorized exception + # @param [String] uri Dataset URI + # @return [Boolean] true if dataset exists and user has get rights, false else + def self.exist?(uri, subjectid=nil) + return false unless uri + dataset = Dataset.new(uri, subjectid) + begin + dataset.load_metadata( subjectid ).size > 0 + rescue + false + end + end # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) - def self.all(uri=CONFIG[:services]["opentox-dataset"]) - RestClientWrapper.get(uri,:accept => "text/uri-list").to_s.each_line.collect{|u| Dataset.new(u)} + def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) + RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u, subjectid)} end # Load YAML representation into the dataset # @param [String] yaml YAML representation of the dataset # @return [OpenTox::Dataset] Dataset object with YAML data def load_yaml(yaml) copy YAML.load(yaml) end def load_rdfxml(rdfxml) - load_rdfxml_file Tempfile.open("ot-rdfxml"){|f| f.write(rdfxml)}.path + raise "rdfxml data is empty" if rdfxml.to_s.size==0 + file = Tempfile.new("ot-rdfxml") + file.puts rdfxml + file.close + load_rdfxml_file file + file.delete end # Load RDF/XML representation from a file # @param [String] file File with RDF/XML representation of the dataset # @return [OpenTox::Dataset] Dataset object with RDF/XML data - def load_rdfxml_file(file) - parser = Parser::Owl::Dataset.new @uri + def load_rdfxml_file(file, subjectid=nil) + parser = Parser::Owl::Dataset.new @uri, subjectid parser.uri = file.path - copy parser.load_uri + copy parser.load_uri(subjectid) end # Load CSV string (format specification: http://toxcreate.org/help) # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually # @param [String] csv CSV representation of the dataset # @return [OpenTox::Dataset] Dataset object with CSV data - def load_csv(csv) - save unless @uri # get a uri for creating features + def load_csv(csv, subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self parser.load_csv(csv) end @@ -100,49 +119,49 @@ # - loads data_entries, compounds, features # - sets metadata (warnings) for parser errors # - you will have to set remaining metadata manually # @param [Excel] book Excel workbook object (created with roo gem) # @return [OpenTox::Dataset] Dataset object with Excel data - def load_spreadsheet(book) - save unless @uri # get a uri for creating features + def load_spreadsheet(book, subjectid=nil) + save(subjectid) unless @uri # get a uri for creating features parser = Parser::Spreadsheets.new parser.dataset = self parser.load_spreadsheet(book) end # Load and return only metadata of a Dataset object # @return [Hash] Metadata of the dataset - def load_metadata - add_metadata Parser::Owl::Dataset.new(@uri).load_metadata + def load_metadata(subjectid=nil) + add_metadata Parser::Owl::Dataset.new(@uri, subjectid).load_metadata(subjectid) self.uri = @uri if @uri # keep uri @metadata end # Load all data (metadata, data_entries, compounds and features) from URI - def load_all + def load_all(subjectid=nil) if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - copy YAML.load(RestClientWrapper.get(@uri, :accept => "application/x-yaml")) + copy YAML.load(RestClientWrapper.get(@uri, {:accept => "application/x-yaml", :subjectid => subjectid})) else - parser = Parser::Owl::Dataset.new(@uri) - copy parser.load_uri + parser = Parser::Owl::Dataset.new(@uri, subjectid) + copy parser.load_uri(subjectid) end end # Load and return only compound URIs from the dataset service # @return [Array] Compound URIs in the dataset - def load_compounds - RestClientWrapper.get(File.join(uri,"compounds"),:accept=> "text/uri-list").to_s.each_line do |compound_uri| + def load_compounds(subjectid=nil) + RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri| @compounds << compound_uri.chomp end @compounds.uniq! end # Load and return only features from the dataset service # @return [Hash] Features of the dataset - def load_features - parser = Parser::Owl::Dataset.new(@uri) - @features = parser.load_features + def load_features(subjectid=nil) + parser = Parser::Owl::Dataset.new(@uri, subjectid) + @features = parser.load_features(subjectid) @features end # Detect feature type(s) in the dataset # @return [String] `classification", "regression", "mixed" or unknown` @@ -216,11 +235,11 @@ def add (compound,feature,value) @compounds << compound unless @compounds.include? compound @features[feature] = {} unless @features[feature] @data_entries[compound] = {} unless @data_entries[compound] @data_entries[compound][feature] = [] unless @data_entries[compound][feature] - @data_entries[compound][feature] << value + @data_entries[compound][feature] << value if value!=nil end # Add/modify metadata, existing entries will be overwritten # @example # dataset.add_metadata({DC.title => "any_title", DC.creator => "my_email"}) @@ -240,38 +259,74 @@ # @param [String] feature Feature URI # @param [Hash] metadata Hash with feature metadata def add_feature_metadata(feature,metadata) metadata.each { |k,v| @features[feature][k] = v } end + + # Add a new compound + # @param [String] compound Compound URI + def add_compound (compound) + @compounds << compound unless @compounds.include? compound + end + + # Creates a new dataset, by splitting the current dataset, i.e. using only a subset of compounds and features + # @param [Array] compounds List of compound URIs + # @param [Array] features List of feature URIs + # @param [Hash] metadata Hash containing the metadata for the new dataset + # @param [String] subjectid + # @return [OpenTox::Dataset] newly created dataset, already saved + def split( compounds, features, metadata, subjectid=nil) + LOGGER.debug "split dataset using "+compounds.size.to_s+"/"+@compounds.size.to_s+" compounds" + raise "no new compounds selected" unless compounds and compounds.size>0 + dataset = OpenTox::Dataset.create(CONFIG[:services]["opentox-dataset"],subjectid) + if features.size==0 + compounds.each{ |c| dataset.add_compound(c) } + else + compounds.each do |c| + features.each do |f| + unless @data_entries[c][f] + dataset.add(c,f,nil) + else + @data_entries[c][f].each do |v| + dataset.add(c,f,v) + end + end + end + end + end + dataset.add_metadata(metadata) + dataset.save(subjectid) + dataset + end # Save dataset at the dataset service # - creates a new dataset if uri is not set # - overwrites dataset if uri exists # @return [String] Dataset URI - def save + def save(subjectid=nil) # TODO: rewrite feature URI's ?? @compounds.uniq! if @uri if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) - RestClientWrapper.post(@uri,{:content_type => "application/x-yaml"},self.to_yaml) + RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid}) else File.open("ot-post-file.rdf","w+") { |f| f.write(self.to_rdfxml); @path = f.path } - task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list"}).to_s.chomp + task_uri = RestClient.post(@uri, {:file => File.new(@path)},{:accept => "text/uri-list" , :subjectid => subjectid}).to_s.chomp #task_uri = `curl -X POST -H "Accept:text/uri-list" -F "file=@#{@path};type=application/rdf+xml" http://apps.ideaconsult.net:8080/ambit2/dataset` Task.find(task_uri).wait_for_completion - self.uri = RestClientWrapper.get(task_uri,:accept => 'text/uri-list') + self.uri = RestClientWrapper.get(task_uri,{:accept => 'text/uri-list', :subjectid => subjectid}) end else # create dataset if uri is empty - self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{}).to_s.chomp + self.uri = RestClientWrapper.post(CONFIG[:services]["opentox-dataset"],{:subjectid => subjectid}).to_s.chomp end @uri end # Delete dataset at the dataset service - def delete - RestClientWrapper.delete @uri + def delete(subjectid=nil) + RestClientWrapper.delete(@uri, :subjectid => subjectid) end private # Copy a dataset (rewrites URI) def copy(dataset) @@ -291,12 +346,12 @@ class LazarPrediction < Dataset # Find a prediction dataset and load all data. # @param [String] uri Prediction dataset URI # @return [OpenTox::Dataset] Prediction dataset object with all data - def self.find(uri) - prediction = LazarPrediction.new(uri) - prediction.load_all + def self.find(uri, subjectid=nil) + prediction = LazarPrediction.new(uri, subjectid) + prediction.load_all(subjectid) prediction end def value(compound) @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first