Sha256: 69d0688bed96fbabe2b8bd9382f0dac31ca9524da8fdc2f9e3f6aafbb39b5cd6

Contents?: true

Size: 1.6 KB

Versions: 4

Compression:

Stored size: 1.6 KB

Contents

require 'strscan'
require_relative '../support'
require_relative '../parsers/parser'

module Bioinform
  class StringParser < Parser
    include MultipleMotifsParser
    attr_reader :scanner, :row_acgt_markers

    def initialize(input)
      raise ArgumentError, 'StringParser should be initialized with a String'  unless input.is_a?(String)
      super
      @scanner = StringScanner.new(input.multiline_squish)
    end

    def number_pat
      /[+-]?\d+(\.\d+)?([eE][+-]?\d{1,3})?/
    end

    def header_pat
      />?\s*(?<name>\S+)\n/
    end

    def row_pat
      /([ACGT]\s*[:|]?\s*)?(?<row>(#{number_pat} )*#{number_pat})\n?/
    end

    def scan_row
      match = scanner.advanced_scan(row_pat)
      match && match[:row]
    end

    def split_row(row_string)
      row_string.split.map(&:to_f)
    end

    def scan_any_spaces
      scanner.scan(/\s+/)
    end

    def parse_name
      match = scanner.advanced_scan(header_pat)
      match && match[:name]
    end

    def parse_matrix
      matrix = []
      @row_acgt_markers = true  if scanner.check(/A.*\nC.*\nG.*\nT.*\n?/)
      while row_string = scan_row
        matrix << split_row(row_string)
      end
      matrix
    end

    def parse_acgt_header
      scanner.scan(/A\s*C\s*G\s*T\s*\n/i)
    end

    def parse!
      scan_any_spaces
      name = parse_name
      parse_acgt_header
      matrix = parse_matrix
      matrix = matrix.transpose if row_acgt_markers
      Parser.parse!(matrix).tap{|result| result.name = name}
    end

    def scanner_reset
      scanner.reset
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
bioinform-0.1.12 lib/bioinform/parsers/string_parser.rb
bioinform-0.1.11 lib/bioinform/parsers/string_parser.rb
bioinform-0.1.10 lib/bioinform/parsers/string_parser.rb
bioinform-0.1.9 lib/bioinform/parsers/string_parser.rb