lib/dataset.rb in opentox-ruby-1.0.2 vs lib/dataset.rb in opentox-ruby-2.0.0

- old
+ new

@@ -72,11 +72,11 @@ # Get all datasets from a service # @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration # @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server) def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil) - RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u, subjectid)} + RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u.chomp, subjectid)} end # Load YAML representation into the dataset # @param [String] yaml YAML representation of the dataset # @return [OpenTox::Dataset] Dataset object with YAML data @@ -156,33 +156,46 @@ end # Load and return only features from the dataset service # @return [Hash] Features of the dataset def load_features(subjectid=nil) - parser = Parser::Owl::Dataset.new(@uri, subjectid) - @features = parser.load_features(subjectid) + if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)) + @features = YAML.load(RestClientWrapper.get(File.join(@uri,"features"), {:accept => "application/x-yaml", :subjectid => subjectid})) + else + parser = Parser::Owl::Dataset.new(@uri, subjectid) + @features = parser.load_features(subjectid) + end @features end + def feature_classes(feature, subjectid=nil) + if Feature.find(feature, subjectid).feature_type == "classification" + classes = [] + @data_entries.each do |c,e| + e[feature].each { |v| classes << v.to_s } + end + classes.uniq.sort + else + nil + end + end + +=begin # Detect feature type(s) in the dataset # @return [String] `classification", "regression", "mixed" or unknown` def feature_type(subjectid=nil) load_features(subjectid) - feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq - if feature_types.size > 1 - "mixed" + feature_types = @features.collect{|f,metadata| metadata[RDF.type]}.flatten.uniq + if feature_types.include?(OT.NominalFeature) + "classification" + elsif feature_types.include?(OT.NumericFeature) + "regression" else - case feature_types.first - when /NominalFeature/ - "classification" - when /NumericFeature/ - "regression" - else - "unknown" - end + "unknown" end end +=end # Get Spreadsheet representation # @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded)) def to_spreadsheet Serializer::Spreadsheets.new(self).to_spreadsheet @@ -282,10 +295,10 @@ if features.size==0 compounds.each{ |c| dataset.add_compound(c) } else compounds.each do |c| features.each do |f| - unless @data_entries[c][f] + if @data_entries[c]==nil or @data_entries[c][f]==nil dataset.add(c,f,nil) else @data_entries[c][f].each do |v| dataset.add(c,f,v) end