Sha256: d43a01fa50f1047d505c6bb8ae7302f3c3bd3b6ef72294e9845ca7d860c19cff
Contents?: true
Size: 1.51 KB
Versions: 2
Compression:
Stored size: 1.51 KB
Contents
# To change this template, choose Tools | Templates # and open the template in the editor. class BlastStringParser def initialize end #Set up Regexps #SPECIES_REGEXP2 = /^.*\[(\w* \w*).*\].*$/ #captures the first two words in square brackets SPECIES_REGEXP2 = /^.*\[(.*)\].*$/ #captures everything in square brackets SGI_REGEXP = /^gi\|(\d+)\|.*$/ #QUERY_SEQ_REGEXP = /^([a-zA-Z0-9]+)[_|\s].*$/ #This captures everything up to the 1st underscore QUERY_SEQ_REGEXP = /^(\S+)\s.*$/ #This captures everything until the first whitespace (more robust) #do not expect whitespace after the last | for robustness, strip later SUBJ_ANNOTATION_REGEXP = /(?:.*\|)*(.*)\[.*/ #TODO check if this REGEXP captures the right stuff def get_sgi_info(a_hit_id) unless SGI_REGEXP.match(a_hit_id) raise("Wrong hit id " + a_hit_id) else return SGI_REGEXP.match(a_hit_id)[1] end end def get_species_name(a_hit_def) unless SPECIES_REGEXP2.match(a_hit_def) raise "No species info found!" else return SPECIES_REGEXP2.match(a_hit_def)[1] end end def get_subject_annotation(a_hit_def) unless SUBJ_ANNOTATION_REGEXP.match(a_hit_def) puts "Can not parse subject annotation " + a_hit_def[0..20] + "...\n" return a_hit_def else return SUBJ_ANNOTATION_REGEXP.match(a_hit_def)[1].strip end end def get_query_seq(a_query) unless QUERY_SEQ_REGEXP.match(a_query) return a_query else return QUERY_SEQ_REGEXP.match(a_query)[1] end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
bio-phyta-0.9.1 | lib/blast_string_parser.rb |
bio-phyta-0.9.0 | lib/blast_string_parser.rb |