Sha256: 74fe332db8574851c57b092ceb34dc9da774cd0ab7865c8093639d2e0b409747

Contents?: true

Size: 1.88 KB

Versions: 4

Compression:

Stored size: 1.88 KB

Contents

require 'rbbt/util/open'
require 'rbbt/util/misc'

def read_chunk(jochem)
  chunk = ""
  while (not jochem.eof? and not (line = jochem.gets).match(/^--/))
    chunk << line
  end
  return nil if chunk.empty?
  chunk
end

def first(list)
  return nil if list.nil? or list.empty?
  list.first
end

def process_jochem
  jochem = Open.open("http://www.biosemantics.org/uploads/file/Jochem/JochemV1_2.zip")
  identifiers = File.open('identifiers', 'w')

  identifiers.puts("#: :namespace=JoChem")
  identifiers.puts("#ID\tCompound Name\tPubChem:ID\tDrugBank:ID")

  lexicon = File.open('lexicon', 'w')
  lexicon.puts("#: :namespace=JoChem")
  lexicon.puts("#ID\tSynonyms")

  inchi = File.open('inchi', 'w')
  inchi.puts("#: :namespace=JoChem")
  inchi.puts("#ID\tInChi")

  definitions = File.open('definitions', 'w')
  definitions.puts("#: :namespace=JoChem#:type=:list")
  definitions.puts("#ID\tDefinition")

  while chunk = read_chunk(jochem) do
    next if chunk.empty? or chunk =~ /^#/ or chunk =~ /^NS /
      info = {}
      chunk.split(/\n/).each do |line|
      line.sub!(/\t@match.*/,'')
      code, value = line.match(/([A-Z]*) (.*)/).values_at 1, 2
      info[code] ||= []
      info[code] << value
    end
    id = first(info["ID"])
    na = first(info["NA"])
    df = first(info["DF"])
    tm = info["TM"] || []
    db = info["DB"] || []

    pubc = db.collect{|code| code.match(/PUBC_(.*)/) ? $1 : nil}.compact
    drug = db.collect{|code| code.match(/DRUG_(.*)/) ? $1 : nil}.compact
    inch = db.collect{|code| code.match(/INCH_InChI=(.*)/) ? $1 : nil}.compact

    lexicon.puts [id, tm.unshift(na) * "|"] * "\t"
    identifiers.puts [id, na, pubc * "|", drug * "|"] * "\t"
    inchi.puts [id, inch * "|"] * "\t" if inch.any?
    definitions.puts [id, df] * "\t" unless df.nil?
  end
end

rule /identifiers|lexicon|inchi|definitions/ do |t|
  Misc.in_dir(File.dirname(t.name)) do
    process_jochem
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
rbbt-sources-1.2.0 share/install/JoChem/Rakefile
rbbt-sources-1.1.0 share/install/JoChem/Rakefile
rbbt-sources-1.0.1 share/install/JoChem/Rakefile
rbbt-sources-1.0.0 share/install/JoChem/Rakefile