Sha256: 04e19d915b7b7aedd70ebb7d113b8e4d35f6a55b167192269b83d1aa2da6d211
Contents?: true
Size: 1.42 KB
Versions: 3
Compression:
Stored size: 1.42 KB
Contents
class BlastStringParser def initialize end #Set up Regexps #SPECIES_REGEXP2 = /^.*\[(\w* \w*).*\].*$/ #captures the first two words in square brackets SPECIES_REGEXP2 = /^.*\[(.*)\].*$/ #captures everything in square brackets SGI_REGEXP = /^gi\|(\d+)\|.*$/ #QUERY_SEQ_REGEXP = /^([a-zA-Z0-9]+)[_|\s].*$/ #This captures everything up to the 1st underscore QUERY_SEQ_REGEXP = /^(\S+)\s.*$/ #This captures everything until the first whitespace (more robust) #do not expect whitespace after the last | for robustness, strip later SUBJ_ANNOTATION_REGEXP = /(?:.*\|)*(.*)\[.*/ #TODO check if this REGEXP captures the right stuff def get_sgi_info(a_hit_id) unless SGI_REGEXP.match(a_hit_id) raise("Wrong hit id " + a_hit_id) else return SGI_REGEXP.match(a_hit_id)[1] end end def get_species_name(a_hit_def) unless SPECIES_REGEXP2.match(a_hit_def) raise "No species info found!" else return SPECIES_REGEXP2.match(a_hit_def)[1] end end def get_subject_annotation(a_hit_def) unless SUBJ_ANNOTATION_REGEXP.match(a_hit_def) puts "Can not parse subject annotation " + a_hit_def[0..20] + "...\n" return a_hit_def else return SUBJ_ANNOTATION_REGEXP.match(a_hit_def)[1].strip end end def get_query_seq(a_query) unless QUERY_SEQ_REGEXP.match(a_query) return a_query else return QUERY_SEQ_REGEXP.match(a_query)[1] end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
bio-phyta-0.9.4 | lib/blast_string_parser.rb |
bio-phyta-0.9.3 | lib/blast_string_parser.rb |
bio-phyta-0.9.2 | lib/blast_string_parser.rb |