Rakefile in buftok-0.1 vs Rakefile in buftok-0.2.0

- old
+ new

@@ -1,31 +1,66 @@ -require 'rake' -require 'rake/rdoctask' -require 'rake/gempackagetask' -require 'spec/rake/spectask' +require 'bundler' +require 'rdoc/task' +require 'rake/testtask' -Spec::Rake::SpecTask.new(:spec) do |task| - task.spec_files = FileList['**/*_spec.rb'] -end +task :default => :test -Rake::RDocTask.new(:rdoc) do |task| - task.rdoc_dir = 'doc' - task.title = 'BufferedTokenizer' - task.rdoc_files.include('lib/**/*.rb') +Bundler::GemHelper.install_tasks + +RDoc::Task.new do |task| + task.rdoc_dir = 'doc' + task.title = 'BufferedTokenizer' + task.rdoc_files.include('lib/**/*.rb') end -spec = Gem::Specification.new do |s| - s.name = %q{buftok} - s.version = "0.1" - s.date = %q{2006-12-18} - s.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of arbitrary inputs} - s.email = %q{tony@clickcaster.com} - s.homepage = %q{http://buftok.rubyforge.org} - s.rubyforge_project = %q{buftok} - s.has_rdoc = true - s.authors = ["Tony Arcieri","Martin Emde"] - s.files = ["Rakefile", "lib", "lib/buftok.rb"] +Rake::TestTask.new :test do |t| + t.libs << 'lib' + t.test_files = FileList['test/**/*.rb'] end -Rake::GemPackageTask.new(spec) do |pkg| - pkg.need_tar = true +desc "Benchmark the current implementation" +task :bench do + require 'benchmark' + require File.expand_path('lib/buftok', File.dirname(__FILE__)) + + n = 50000 + delimiter = "\n\n" + + frequency1 = 1000 + puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..." + data1 = (0...n).map do |i| + (((i % frequency1 == 1) ? "\n" : "") + + ("s" * i) + + ((i % frequency1 == 0) ? "\n" : "")).freeze + end + + frequency2 = 10 + puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..." + data2 = (0...n).map do |i| + (((i % frequency2 == 1) ? "\n" : "") + + ("s" * i) + + ((i % frequency2 == 0) ? "\n" : "")).freeze + end + + Benchmark.bmbm do |x| + x.report("1 char, freq: #{frequency1}") do + bt1 = BufferedTokenizer.new + n.times { |i| bt1.extract(data1[i]) } + end + + x.report("2 char, freq: #{frequency1}") do + bt2 = BufferedTokenizer.new(delimiter) + n.times { |i| bt2.extract(data1[i]) } + end + + x.report("1 char, freq: #{frequency2}") do + bt3 = BufferedTokenizer.new + n.times { |i| bt3.extract(data2[i]) } + end + + x.report("2 char, freq: #{frequency2}") do + bt4 = BufferedTokenizer.new(delimiter) + n.times { |i| bt4.extract(data2[i]) } + end + + end end