lib/dataset.rb in opentox-ruby-1.0.2 vs lib/dataset.rb in opentox-ruby-2.0.0
- old
+ new
@@ -72,11 +72,11 @@
# Get all datasets from a service
# @param [optional,String] uri URI of the dataset service, defaults to service specified in configuration
# @return [Array] Array of dataset object without data (use one of the load_* methods to pull data from the server)
def self.all(uri=CONFIG[:services]["opentox-dataset"], subjectid=nil)
- RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u, subjectid)}
+ RestClientWrapper.get(uri,{:accept => "text/uri-list",:subjectid => subjectid}).to_s.each_line.collect{|u| Dataset.new(u.chomp, subjectid)}
end
# Load YAML representation into the dataset
# @param [String] yaml YAML representation of the dataset
# @return [OpenTox::Dataset] Dataset object with YAML data
@@ -156,33 +156,46 @@
end
# Load and return only features from the dataset service
# @return [Hash] Features of the dataset
def load_features(subjectid=nil)
- parser = Parser::Owl::Dataset.new(@uri, subjectid)
- @features = parser.load_features(subjectid)
+ if (CONFIG[:yaml_hosts].include?(URI.parse(@uri).host))
+ @features = YAML.load(RestClientWrapper.get(File.join(@uri,"features"), {:accept => "application/x-yaml", :subjectid => subjectid}))
+ else
+ parser = Parser::Owl::Dataset.new(@uri, subjectid)
+ @features = parser.load_features(subjectid)
+ end
@features
end
+ def feature_classes(feature, subjectid=nil)
+ if Feature.find(feature, subjectid).feature_type == "classification"
+ classes = []
+ @data_entries.each do |c,e|
+ e[feature].each { |v| classes << v.to_s }
+ end
+ classes.uniq.sort
+ else
+ nil
+ end
+ end
+
+=begin
# Detect feature type(s) in the dataset
# @return [String] `classification", "regression", "mixed" or unknown`
def feature_type(subjectid=nil)
load_features(subjectid)
- feature_types = @features.collect{|f,metadata| metadata[OT.isA]}.uniq
- if feature_types.size > 1
- "mixed"
+ feature_types = @features.collect{|f,metadata| metadata[RDF.type]}.flatten.uniq
+ if feature_types.include?(OT.NominalFeature)
+ "classification"
+ elsif feature_types.include?(OT.NumericFeature)
+ "regression"
else
- case feature_types.first
- when /NominalFeature/
- "classification"
- when /NumericFeature/
- "regression"
- else
- "unknown"
- end
+ "unknown"
end
end
+=end
# Get Spreadsheet representation
# @return [Spreadsheet::Workbook] Workbook which can be written with the spreadsheet gem (data_entries only, metadata will will be discarded))
def to_spreadsheet
Serializer::Spreadsheets.new(self).to_spreadsheet
@@ -282,10 +295,10 @@
if features.size==0
compounds.each{ |c| dataset.add_compound(c) }
else
compounds.each do |c|
features.each do |f|
- unless @data_entries[c][f]
+ if @data_entries[c]==nil or @data_entries[c][f]==nil
dataset.add(c,f,nil)
else
@data_entries[c][f].each do |v|
dataset.add(c,f,v)
end