require "strscan" module FBO class Parser TAG_PATTERN = /<\w+>/m PATTERN = /<([A-Z]+)>.*<\/\1>/m def initialize(file, tag_pattern: TAG_PATTERN) @file = file @tag_pattern = tag_pattern end def notices if @notices.nil? parse_notices(split_file_contents) end @notices end private def split_file_contents contents = @file.read contents = cleanup_contents(contents) scanner = StringScanner.new(contents) text_notices = [] while !scanner.eos? initial_tag = scanner.check_until(@tag_pattern) if initial_tag initial_tag.strip! else break end text_notice = scanner.scan_until(/#{ closing_tag_for(initial_tag) }\s*/) if text_notice text_notices << text_notice.strip else break end end text_notices end def parse_notices(text_notices) @notices = [] text_notices.each do |text| if FBO::Parser::PresolicitationHandler.is_presolicitation?(text) @notices << FBO::Parser::PresolicitationHandler.parse(text) elsif FBO::Parser::CombinedSolicitationHandler.is_combined_solicitation?(text) @notices << FBO::Parser::CombinedSolicitationHandler.parse(text) elsif FBO::Parser::SourcesSoughtHandler.is_sources_sought?(text) @notices << FBO::Parser::SourcesSoughtHandler.parse(text) elsif FBO::Parser::AmendmentHandler.is_amendment?(text) @notices << FBO::Parser::AmendmentHandler.parse(text) elsif FBO::Parser::ModificationHandler.is_modification?(text) @notices << FBO::Parser::ModificationHandler.parse(text) elsif FBO::Parser::AwardHandler.is_award?(text) @notices << FBO::Parser::AwardHandler.parse(text) else @notices << FBO::Parser::UnknownHandler.parse(text) end end end def cleanup_contents(contents) contents .encode('UTF-16le', :invalid => :replace, :replace => '') .encode('UTF-8') .gsub(/\r\n/, "\n") .gsub(/^M/, "") end def closing_tag_for(tag) tag.sub(/