#!/usr/bin/env ruby # The Computer Language Shootout # http://shootout.alioth.debian.org/ # # contributed by jose fco. gonzalez # Adapted for the Ruby Benchmark Suite. require 'benchmark' require 'rubygems' require 'ruby-boost-regex' require 'oniguruma' require 'lorem' include Oniguruma fname = File.dirname(__FILE__) + "/fasta.input" seq = File.read(fname) seq.gsub!(/>.*\n|\n/,"") def fair_scan(string, regex) result = [] idx = -1 while idx match = regex.match(string[(idx + 1)..-1]) break unless match idx += match.begin(0) + 1 result << match.to_a end result end regexes = [ /agggtaaa|tttaccct/i, /[cgt]gggtaaa|tttaccc[acg]/i, /a[act]ggtaaa|tttacc[agt]t/i, /ag[act]gtaaa|tttac[agt]ct/i, /agg[act]taaa|ttta[agt]cct/i, /aggg[acg]aaa|ttt[cgt]ccct/i, /agggt[cgt]aa|tt[acg]accct/i, /agggta[cgt]a|t[acg]taccct/i, /agggtaa[cgt]|[acg]ttaccct/i ] boost_regexes = [ Boost::Regexp.new('agggtaaa|tttaccct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('[cgt]gggtaaa|tttaccc[acg]', Boost::Regexp::IGNORECASE), Boost::Regexp.new('a[act]ggtaaa|tttacc[agt]t', Boost::Regexp::IGNORECASE), Boost::Regexp.new('ag[act]gtaaa|tttac[agt]ct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('agg[act]taaa|ttta[agt]cct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('aggg[acg]aaa|ttt[cgt]ccct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('agggt[cgt]aa|tt[acg]accct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('agggta[cgt]a|t[acg]taccct', Boost::Regexp::IGNORECASE), Boost::Regexp.new('agggtaa[cgt]|[acg]ttaccct', Boost::Regexp::IGNORECASE) ] oni_regexes = [ ORegexp.new('agggtaaa|tttaccct', :options => OPTION_IGNORECASE), ORegexp.new('[cgt]gggtaaa|tttaccc[acg]', :options => OPTION_IGNORECASE), ORegexp.new('a[act]ggtaaa|tttacc[agt]t', :options => OPTION_IGNORECASE), ORegexp.new('ag[act]gtaaa|tttac[agt]ct', :options => OPTION_IGNORECASE), ORegexp.new('agg[act]taaa|ttta[agt]cct', :options => OPTION_IGNORECASE), ORegexp.new('aggg[acg]aaa|ttt[cgt]ccct', :options => OPTION_IGNORECASE), ORegexp.new('agggt[cgt]aa|tt[acg]accct', :options => OPTION_IGNORECASE), ORegexp.new('agggta[cgt]a|t[acg]taccct', :options => OPTION_IGNORECASE), ORegexp.new('agggtaa[cgt]|[acg]ttaccct', :options => OPTION_IGNORECASE) ] puts "DNA-Matching (Computer Language Shootout)" puts "=========================================" Benchmark.bmbm do |x| x.report("Normal regex") { 100.times { regexes.each { |reg| fair_scan(seq, reg)}} } x.report("Oniguruma") { 100.times { oni_regexes.each {|reg| fair_scan(seq, reg)}} } x.report("Boost regex") { 100.times { boost_regexes.each { |reg| fair_scan(seq, reg)}} } end reg = /\d{3}-\d{3}-\d{4}/ boost_reg = Boost::Regexp.new('\d{3}-\d{3}-\d{4}') oni_reg = ORegexp.new('\d{3}-\d{3}-\d{4}') text = Lorem::Base.new('paragraphs', 200).output puts "" puts "Failing to match a phone number in a big string of text" puts "=======================================================" Benchmark.bmbm do |x| x.report("Normal regex") { 100.times { fair_scan(text, reg)}} x.report("Oniguruma") { 100.times { fair_scan(text, oni_reg)}} x.report("Boost regex") { 100.times { fair_scan(text, boost_reg)}} end