# # = bio/db/medline.rb - NCBI PubMed/MEDLINE database class # # Copyright:: Copyright (C) 2001, 2005 # Toshiaki Katayama # License:: The Ruby License # # $Id: medline.rb,v 1.17 2007/12/21 05:12:41 k Exp $ # require 'bio/db' module Bio # == Description # # NCBI PubMed/MEDLINE database class. # # == Examples # # medline = Bio::MEDLINE.new(txt) # medline.reference # medline.pmid == medline.entry_id # medilne.mesh # class MEDLINE < NCBIDB def initialize(entry) @pubmed = Hash.new('') tag = '' entry.each_line do |line| if line =~ /^\w/ tag = line[0,4].strip else # continuation from previous lines @pubmed[tag] = @pubmed[tag].sub(/(?:\r|\r\n|\n)\z/, ' ') end value = line[6..-1] @pubmed[tag] += value if value end end attr_reader :pubmed # returns a Reference object. def reference hash = Hash.new hash['authors'] = authors hash['title'] = title hash['journal'] = journal hash['volume'] = volume hash['issue'] = issue hash['pages'] = pages hash['year'] = year hash['pubmed'] = pmid hash['medline'] = ui hash['abstract'] = abstract hash['mesh'] = mesh hash['affiliations'] = affiliations hash.delete_if { |k, v| v.nil? or v.empty? } return Reference.new(hash) end ### Common MEDLINE tags # PMID - PubMed Unique Identifier # Unique number assigned to each PubMed citation. def pmid @pubmed['PMID'].strip end alias entry_id pmid # UI - MEDLINE Unique Identifier # Unique number assigned to each MEDLINE citation. def ui @pubmed['UI'].strip end # TA - Journal Title Abbreviation # Standard journal title abbreviation. def ta @pubmed['TA'].gsub(/\s+/, ' ').strip end alias journal ta # VI - Volume # Journal volume. def vi @pubmed['VI'].strip end alias volume vi # IP - Issue # The number of the issue, part, or supplement of the journal in which # the article was published. def ip @pubmed['IP'].strip end alias issue ip # PG - Page Number # The full pagination of the article. def pg @pubmed['PG'].strip end def pages pages = pg if pages =~ /-/ from, to = pages.split('-') if (len = from.length - to.length) > 0 to = from[0,len] + to end pages = "#{from}-#{to}" end return pages end # DP - Publication Date # The date the article was published. def dp @pubmed['DP'].strip end alias date dp def year dp[0,4] end # TI - Title Words # The title of the article. def ti @pubmed['TI'].gsub(/\s+/, ' ').strip end alias title ti # AB - Abstract # Abstract. def ab @pubmed['AB'].gsub(/\s+/, ' ').strip end alias abstract ab # AU - Author Name # Authors' names. def au @pubmed['AU'].strip end def authors authors = [] au.split(/\n/).each do |author| if author =~ / / name = author.split(/\s+/) suffix = nil if name.length > 2 && name[-2] =~ /^[A-Z]+$/ # second to last are the initials suffix = name.pop end initial = name.pop.split(//).join('. ') author = "#{name.join(' ')}, #{initial}." end if suffix author << " " + suffix end authors.push(author) end return authors end # SO - Source # Composite field containing bibliographic information. def so @pubmed['SO'].strip end alias source so # MH - MeSH Terms # NLM's controlled vocabulary. def mh @pubmed['MH'].strip.split(/\n/) end alias mesh mh # AD - Affiliation # Institutional affiliation and address of the first author, and grant # numbers. def ad @pubmed['AD'].strip.split(/\n/) end alias affiliations ad # AID - Article Identifier # Article ID values may include the pii (controlled publisher identifier) # or doi (Digital Object Identifier). def doi @pubmed['AID'][/(\S+) \[doi\]/, 1] end def pii @pubmed['AID'][/(\S+) \[pii\]/, 1] end ### Other MEDLINE tags # CI - Copyright Information # Copyright statement. # CIN - Comment In # Reference containing a comment about the article. # CN - Collective Name # Corporate author or group names with authorship responsibility. # CON - Comment On # Reference upon which the article comments. # CY - Country # The place of publication of the journal. # DA - Date Created # Used for internal processing at NLM. # DCOM - Date Completed # Used for internal processing at NLM. # DEP - Date of Electronic Publication # Electronic publication date. # EDAT - Entrez Date # The date the citation was added to PubMed. # EIN - Erratum In # Reference containing a published erratum to the article. # GS - Gene Symbol # Abbreviated gene names (used 1991 through 1996). # ID - Identification Number # Research grant numbers, contract numbers, or both that designate # financial support by any agency of the US PHS (Public Health Service). # IS - ISSN # International Standard Serial Number of the journal. # JC - Journal Title Code # MEDLINE unique three-character code for the journal. # JID - NLM Unique ID # Unique journal ID in NLM's catalog of books, journals, and audiovisuals. # LA - Language # The language in which the article was published. # LR - Last Revision Date # The date a change was made to the record during a maintenance procedure. # MHDA - MeSH Date # The date MeSH terms were added to the citation. The MeSH date is the # same as the Entrez date until MeSH are added. # PHST - Publication History Status Date # History status date. # PS - Personal Name as Subject # Individual is the subject of the article. # PST - Publication Status # Publication status. # PT - Publication Type # The type of material the article represents. def pt @pubmed['PT'].strip.split(/\n/) end alias publication_type pt # RF - Number of References # Number of bibliographic references for Review articles. # RIN - Retraction In # Retraction of the article # RN - EC/RN Number # Number assigned by the Enzyme Commission to designate a particular # enzyme or by the Chemical Abstracts Service for Registry Numbers. # ROF - Retraction Of # Article being retracted. # RPF - Republished From # Original article. # SB - Journal Subset # Code for a specific set of journals. # SI - Secondary Source Identifier # Identifies a secondary source that supplies information, e.g., other # data sources, databanks and accession numbers of molecular sequences # discussed in articles. # TT - Transliterated / Vernacular Title # Non-Roman alphabet language titles are transliterated. # UIN - Update In # Update to the article. # UOF - Update Of # The article being updated. # URLF - URL Full-Text # Link to the full-text of article at provider's website. Links are # incomplete. Use PmLink for the complete set of available links. # [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html # URLS - URL Summary # Link to the article summary at provider's website. Links are # incomplete. Use PmLink for the complete set of available links. # [PmLink] http://www.ncbi.nlm.nih.gov/entrez/utils/pmlink_help.html end # MEDLINE end # Bio