require 'spec_helper' require 'fileutils' # http://ctr.wikia.com/wiki/Chemistry_Toolkit_Rosetta_Wiki # these are written as if the user is starting the project from scratch in the # style of the wiki. We put the test files in the same folder as our specs so # that the code can be copied directly into the wiki # the code that should be copied is within the capture_stdout blocks module Kernel def wiki_code(&block) block.call end alias_method :wiki_code_capture_stdout, :capture_stdout end def unlink(file) File.delete(file) if File.exist?(file) end describe 'Chemistry Toolkit Rosetta Wiki' do def pngs_are_about_same_size(png1, png2, delta=150) (png1.size - png2.size < delta) end def pngs_have_same_header(png1, png2, check_length=18) png1[0,check_length] == png2[0,check_length] end before(:each) do @orig_dir = Dir.pwd @wiki_spec_dir = File.dirname(__FILE__) Dir.chdir(@wiki_spec_dir) @keydir = @wiki_spec_dir + "/key" end after(:each) do Dir.chdir(@orig_dir) end specify 'Heavy atom counts from an SD file' do #http://ctr.wikia.com/wiki/Heavy_atom_counts_from_an_SD_file output = wiki_code_capture_stdout do require 'rubabel' Rubabel.foreach("benzodiazepine.sdf.gz") {|mol| puts mol.ob.num_hvy_atoms } end output.should == IO.read( @keydir + "/" + "benzodiazepine_heavy_atom_counts.output.10.txt" ) end specify 'Ring counts in a SMILES file' do # http://ctr.wikia.com/wiki/Ring_counts_in_a_SMILES_file output = wiki_code_capture_stdout do require 'rubabel' Rubabel.foreach("benzodiazepine.sdf.gz") {|mol| puts mol.ob_sssr.size } end output.should == IO.read( @keydir + '/' + "benzodiazepine_ring_counts.output.10.txt" ) end specify 'Convert a SMILES string to canonical SMILES' do # http://ctr.wikia.com/wiki/Convert_a_SMILES_string_to_canonical_SMILES wiki_code do require 'rubabel' smiles = %w{CN2C(=O)N(C)C(=O)C1=C2N=CN1C CN1C=NC2=C1C(=O)N(C)C(=O)N2C} cans = smiles.map {|smile| Rubabel[smile] } fail unless cans.reduce(:==) end end specify 'Working with SD tag data' do # http://ctr.wikia.com/wiki/Working_with_SD_tag_data wiki_code do require 'rubabel' File.open("RULE5.sdf", 'w') do |out| Rubabel.foreach("benzodiazepine.sdf.gz") do |mol| mol.data["RULE5"] = if mol.data.key?('PUBCHEM_XLOGP3') num_true = [ mol.data["PUBCHEM_CACTVS_HBOND_DONOR"].to_i <= 5, mol.data["PUBCHEM_CACTVS_HBOND_ACCEPTOR"].to_i <= 10, mol.data["PUBCHEM_MOLECULAR_WEIGHT"].to_f <= 500, mol.data["PUBCHEM_XLOGP3"].to_f <= 5 ].count(true) if num_true >= 3 then "1" else "0" end else "no logP" end out.print mol.write(:sdf) end end # NOTE for wiki: This updates the #2 line of each molfile to mention "OpenBabel" and the current timestamp, as it ought to. end (lines_got, lines_exp) = ["RULE5.sdf", @keydir + "/rule5.10.sdf"].map do |file| lines = IO.readlines(file).reject {|line| line =~ /OpenBabel/ } end lines_got[0,10].should == lines_exp[0,10] unlink "RULE5.sdf" end specify 'Detect and report SMILES and SDF parsing errors' do # http://ctr.wikia.com/wiki/Detect_and_report_SMILES_and_SDF_parsing_errors wiki_code do require 'rubabel' File.open("log.txt", 'w') do |out| %w(Q C1C).each do |smile| Rubabel[smile] rescue out.puts "bad smiles #{smile}" end end # note: error catching can only be performed with the from_string # method at the moment. end IO.read("log.txt").should == "bad smiles Q\nbad smiles C1C\n" puts "^^^^^ (ignore the above warning, part of spec) ^^^^^" # TODO: implement error catching in file reading # TODO: muffle the warning that Open Babel spits out on unmatched ring # bonds. Tried to capture stdout and stderr to no avail. unlink("log.txt") end specify 'Report how many SD file records are within a certain molecular weight range' do # http://ctr.wikia.com/wiki/Report_how_many_SD_file_records_are_within_a_certain_molecular_weight_range output = wiki_code_capture_stdout do require 'rubabel' puts Rubabel.foreach("benzodiazepine.sdf.gz").count {|mol| (300..400) === mol.mol_wt } end output.should == "7\n" # checked with large file and it came out to 3916, which is correct end specify 'Convert SMILES file to SD file' do # http://ctr.wikia.com/wiki/Convert_SMILES_file_to_SD_file wiki_code do require 'rubabel' File.open("benzodiazepine.smi.sdf", 'w') do |out| Rubabel.foreach("benzodiazepine.smi.gz") do |mol| mol.make_3d! out.print mol.write_string(:sdf) end end # note: OpenBabel gets most stereochemistry correct. When it cannot # the particular atoms are noted in a message to stderr # note: A minimal amount of optimization is performed in this routine # (following pybel's make3D routine) so that the molecule doesn't make # chemists reel. end unlink "benzodiazepine.smi.sdf" end specify 'Report the similarity between two structures' do # http://ctr.wikia.com/wiki/Report_the_similarity_between_two_structures output = wiki_code_capture_stdout do require 'rubabel' (mol1, mol2) = %w{CC(C)C=CCCCCC(=O)NCc1ccc(c(c1)OC)O COC1=C(C=CC(=C1)C=O)O}.map{|smile| Rubabel[smile]} puts mol1.tanimoto(mol2) end output.should == "0.36046511627906974\n" # notes: reports similarity of 0.36046511627906974 end specify 'Find the 10 nearest neighbors in a data set' do # http://ctr.wikia.com/wiki/Find_the_10_nearest_neighbors_in_a_data_set output = wiki_code_capture_stdout do require 'rubabel' prev = nil comp = [] Rubabel.foreach("benzodiazepine.sdf.gz").map do |mol| prev = mol if prev.nil? comp << [prev.tanimoto(mol), mol.title] end puts comp.sort.reverse[1,11].map {|v| "#{v[0].round(3)} #{v[1]}" } end output.should == "0.979 3016\n0.979 2997\n0.909 3369\n0.874 2809\n0.769 3299\n0.74 2802\n0.379 3261\n0.377 2118\n0.368 1963\n" # output when run on entire benzodiazepine.sdf.gz: #1.0 450820 #1.0 1688 #0.993 20351792 #0.986 9862446 #0.979 398658 #0.979 398657 #0.979 6452650 #0.979 450830 #0.979 44353759 #0.979 3016 #0.979 2997 end # jtp original implementation =begin output = wiki_code_capture_stdout do require 'rubabel' fp1 = nil comparisons = Rubabel.foreach("benzodiazepine.sdf.gz").map do |mol| if fp1 [OpenBabel::OBFingerprint.tanimoto(fp1, mol.ob_fingerprint), mol.title] else (fp1 = mol.ob_fingerprint) && nil end end.compact puts comparisons.sort.reverse[0,10].map {|v| "#{v[0].round(3)} #{v[1]}" } end output.should == "0.979 3016\n0.979 2997\n0.909 3369\n0.874 2809\n0.769 3299\n0.74 2802\n0.379 3261\n0.377 2118\n0.368 1963\n" # notes: still need a first class fingerprint object so this cleaner # output when run on entire benzodiazepine.sdf.gz: # 1.0 623918 # 1.0 450820 # 0.993 20351792 # 0.986 9862446 # 0.979 398658 # 0.979 398657 # 0.979 6452650 # 0.979 450830 # 0.979 44353759 # 0.979 3016 # this iterator method gives a segfault!! (not sure why) #mol_iter = Rubabel.foreach("benzodiazepine.sdf.gz") #fp1 = mol_iter.next.ob_fingerprint #loop do # mol=mol_iter.next rescue break # comparisons << [OpenBabel::OBFingerprint.tanimoto(fp1, mol.ob_fingerprint).round(2), mol.title] #end #puts comparisons.sort.reverse.map {|pair| pair.join(" ") } end =end specify 'Depict a compound as an image' do # http://ctr.wikia.com/wiki/Depict_a_compound_as_an_image wiki_code do require 'rubabel' mol = Rubabel["CN1C=NC2=C1C(=O)N(C(=O)N2C)C"] mol.title = 'Caffeine' mol.write('caffeine.png') # NOTE: use svg and convert to png to change image size end png_out = IO.read(@wiki_spec_dir + "/caffeine.png") key_out = IO.read(@keydir + "/caffeine.frozen.png") pngs_are_about_same_size(png_out, key_out).should be_true pngs_have_same_header(png_out, key_out).should be_true File.unlink('caffeine.png') end #OR, using commandline #%x{obabel -:"CN1C=NC2=C1C(=O)N(C(=O)N2C)C" -O "mol.png" -xP 300} specify 'Highlight a substructure in the depiction' do # http://ctr.wikia.com/wiki/Highlight_a_substructure_in_the_depiction # when we fix the draw function, this should work like a charm! wiki_code do require 'rubabel' mol = Rubabel.foreach("benzodiazepine.sdf.gz").find {|mol| mol.title == "3016" } mol.highlight_substructure!("c1ccc2c(c1)C(=NCCN2)c3ccccc3").remove_h! mol.write("3016_highlighted.rubabel.png", u: true) # NOTE: use svg and convert to png to change image size end png_out = IO.read(@wiki_spec_dir + "/3016_highlighted.rubabel.png") key_out = IO.read(@keydir + "/3016_highlighted.rubabel.frozen.png") pngs_are_about_same_size(png_out, key_out).should be_true pngs_have_same_header(png_out, key_out).should be_true File.unlink('3016_highlighted.rubabel.png') end specify 'Align the depiction using a fixed substructure' do # http://ctr.wikia.com/wiki/Align_the_depiction_using_a_fixed_substructure #TODO end specify 'Unique SMARTS matches against a SMILES string' do # http://ctr.wikia.com/wiki/Unique_SMARTS_matches_against_a_SMILES_string output = wiki_code_capture_stdout do require 'rubabel' mol = Rubabel["C1CC12C3(C24CC4)CC3"] [false, true].map {|uniq| puts mol.matches("*1**1", uniq).size } end output.should == "24\n4\n" end specify 'Calculate TPSA' do output = wiki_code_capture_stdout do require 'rubabel' lines = IO.readlines("tpsa.tab") header = lines.shift @patterns = lines.map {|line| line.chomp.split("\t") } def TPSA(mol) @patterns.inject(0.0) {|s,p| s + p[0].to_f * mol.matches(p[1], false).size } end puts TPSA( Rubabel["CN2C(=O)N(C)C(=O)C1=C2N=CN1C"] ) end output.should == "61.82\n" end specify 'Find the graph diameter' do output = wiki_code_capture_stdout do require 'rubabel' puts Rubabel["CC(C)C(C(=O)NC(=O)C1CCCN1C(=O)C(C)NC(=O)C(C)NC(=O)CCC(=O)OC)NC2=CC=C(C=C2)[N+](=O)[O-]"].graph_diameter end output.should == "24\n" end specify 'Break rotatable bonds and report the fragments' do output = wiki_code_capture_stdout do smarts = "[!$([NH]!@C(=O))&!D1&!$(*#*)]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]" mol = Rubabel["c1ccc2c(c1)C(=NC(C(=O)N2CC(=O)O)Cc3ccc(cc3)O)c4cccc(c4)O"] mol.matches(smarts).each do |atom1, atom2| mol.delete(atom1.get_bond(atom2)) [atom1, atom2].each do |old_a| mol.add_atom!(0, 1, old_a) end end puts "#{mol.to_s.gsub('.',"\n")}" end output.should == "*C*\n*C*\n*C(=O)O\nO=C1C(*)N=C(c2c(N1*)cccc2)*\n*c1cccc(c1)O\n*c1ccc(cc1)O\n" end xit 'Perform a substructure search on SDF file and report the number of false positives' do output = wiki_code_capture_stdout do require 'rubabel' smart = Rubabel::Smarts.new("C1C=C(NC=O)C=CC=1") sm_mol = Rubabel[smart.to_s] count = 0 Rubabel.foreach("benzodiazepine.sdf.gz").map do |mol| count += 1 if sm_mol.to_s == mol.ob_fingerprint.to_s end #aromatize should be automatic for converstion from smarts?? #fingerprint search #bitTest/full substructure match puts count # puts "#{mols.size} total\n#{num_true_matches} matches\n#{num_matches - num_true_matches}" end # output.should == "12386 total\n8836 matches\n1 false positives\n" output.should == "" end xit 'Change stereochemistry of certain atoms in SMILES file' do #TODO end end =begin p OpenBabel::OBOp.methods - Object.methods pgen = OpenBabel::OBOp.find_type("gen3D") p pgen.methods - Object.new.methods pgen.do(mol.ob) out.print mol.write_string(:sdf) =end