lib/import.rb in lazar-1.1.3 vs lib/import.rb in lazar-1.2.0

- old
+ new

@@ -1,125 +1,21 @@ module OpenTox - # Import data from external databases - module Import + class Import - class Enanomapper - include OpenTox - - # Import from eNanoMapper - def self.import - # time critical step: JSON parsing (>99%), Oj brings only minor speed gains (~1%) - datasets = {} - bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle', {}, {accept: :json}))["dataset"] - bundles.each do |bundle| - datasets[bundle["URI"]] = Dataset.find_or_create_by(:source => bundle["URI"],:name => bundle["title"].strip) - $logger.debug bundle["title"].strip - nanoparticles = JSON.parse(RestClientWrapper.get(bundle["dataset"], {}, {accept: :json}))["dataEntry"] - nanoparticles.each_with_index do |np,n| - core_id = nil - coating_ids = [] - np["composition"].each do |c| - uri = c["component"]["compound"]["URI"] - data = JSON.parse(RestClientWrapper.get("https://data.enanomapper.net/query/compound/url/all?search=#{uri}", {}, {accept: :json})) - source = data["dataEntry"][0]["compound"]["URI"] - smiles = data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23SMILESDefault"] - names = [] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23ChemicalNameDefault"] - names << data["dataEntry"][0]["values"]["https://data.enanomapper.net/feature/http%3A%2F%2Fwww.opentox.org%2Fapi%2F1.1%23IUPACNameDefault"] - if smiles - compound = Compound.find_or_create_by(:smiles => smiles) - compound.name = names.first - compound.names = names.compact - else - compound = Compound.find_or_create_by(:name => names.first,:names => names.compact) - end - compound.source = source - compound.save - if c["relation"] == "HAS_CORE" - core_id = compound.id.to_s - elsif c["relation"] == "HAS_COATING" - coating_ids << compound.id.to_s - end - end if np["composition"] - nanoparticle = Nanoparticle.find_or_create_by( - :name => np["values"]["https://data.enanomapper.net/identifier/name"], - :source => np["compound"]["URI"], - :core_id => core_id, - :coating_ids => coating_ids - ) - np["bundles"].keys.each do |bundle_uri| - nanoparticle.dataset_ids << datasets[bundle_uri].id - end - - studies = JSON.parse(RestClientWrapper.get(File.join(np["compound"]["URI"],"study"), {}, {accept: :json}))["study"] - studies.each do |study| - dataset = datasets[np["bundles"].keys.first] - proteomics_features = {} - category = study["protocol"]["topcategory"] - source = study["protocol"]["category"]["term"] - study["effects"].each do |effect| - - effect["result"]["textValue"] ? klass = NominalFeature : klass = NumericFeature - effect["conditions"].delete_if { |k, v| v.nil? } - - if study["protocol"]["category"]["title"].match(/Proteomics/) and effect["result"]["textValue"] and effect["result"]["textValue"].length > 50 # parse proteomics data - - JSON.parse(effect["result"]["textValue"]).each do |identifier, value| # time critical step - proteomics_features[identifier] ||= NumericFeature.find_or_create_by(:name => identifier, :category => "Proteomics", :unit => "Spectral counts", :source => source,:measured => true) - nanoparticle.parse_ambit_value proteomics_features[identifier], value, dataset - end - else - name = effect["endpoint"] - unit = effect["result"]["unit"] - warnings = [] - case name - when "Log2 transformed" # use a sensible name - name = "log2(Net cell association)" - warnings = ["Original name was 'Log2 transformed'"] - unit = "log2(mL/ug(Mg))" - when "Total protein (BCA assay)" - category = "P-CHEM" - warnings = ["Category changed from TOX to P-CHEM"] - end - feature = klass.find_or_create_by( - :name => name, - :unit => unit, - :category => category, - :conditions => effect["conditions"], - :source => study["protocol"]["category"]["term"], - :measured => true, - :warnings => warnings - ) - nanoparticle.parse_ambit_value feature, effect["result"], dataset - end - end - end - nanoparticle.save - print "#{n}, " - end - puts + # Import datasets from the data folder, create and validate models + # @return [Array<OpenTox::Model::Validation>] Validated models + def self.public_data + models = [] + Dir[File.join(File.dirname(__FILE__),"..","data/*csv")].each do |f| + $logger.debug f + m = Model::Validation.from_csv_file f + $logger.debug "#{f} ID: #{m.id.to_s}" + m.crossvalidations.each do |cv| + $logger.debug cv.statistics end - datasets.each { |u,d| d.save } + models << m end - -=begin - def self.import_ld # defunct, AMBIT JSON_LD does not have substance entries - #get list of bundle URIs - bundles = JSON.parse(RestClientWrapper.get('https://data.enanomapper.net/bundle?media=application%2Fjson'))["dataset"] - datasets = [] - bundles.each do |bundle| - uri = bundle["URI"] - study = JSON.parse(`curl -H 'Accept:application/ld+json' '#{uri}/substance'`) - study["@graph"].each do |i| - puts i.to_yaml if i.keys.include? "sio:has-value" - end - end - datasets.collect{|d| d.id} - end -=end - + models end - end - end