Rakefile in buftok-0.1 vs Rakefile in buftok-0.2.0
- old
+ new
@@ -1,31 +1,66 @@
-require 'rake'
-require 'rake/rdoctask'
-require 'rake/gempackagetask'
-require 'spec/rake/spectask'
+require 'bundler'
+require 'rdoc/task'
+require 'rake/testtask'
-Spec::Rake::SpecTask.new(:spec) do |task|
- task.spec_files = FileList['**/*_spec.rb']
-end
+task :default => :test
-Rake::RDocTask.new(:rdoc) do |task|
- task.rdoc_dir = 'doc'
- task.title = 'BufferedTokenizer'
- task.rdoc_files.include('lib/**/*.rb')
+Bundler::GemHelper.install_tasks
+
+RDoc::Task.new do |task|
+ task.rdoc_dir = 'doc'
+ task.title = 'BufferedTokenizer'
+ task.rdoc_files.include('lib/**/*.rb')
end
-spec = Gem::Specification.new do |s|
- s.name = %q{buftok}
- s.version = "0.1"
- s.date = %q{2006-12-18}
- s.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of arbitrary inputs}
- s.email = %q{tony@clickcaster.com}
- s.homepage = %q{http://buftok.rubyforge.org}
- s.rubyforge_project = %q{buftok}
- s.has_rdoc = true
- s.authors = ["Tony Arcieri","Martin Emde"]
- s.files = ["Rakefile", "lib", "lib/buftok.rb"]
+Rake::TestTask.new :test do |t|
+ t.libs << 'lib'
+ t.test_files = FileList['test/**/*.rb']
end
-Rake::GemPackageTask.new(spec) do |pkg|
- pkg.need_tar = true
+desc "Benchmark the current implementation"
+task :bench do
+ require 'benchmark'
+ require File.expand_path('lib/buftok', File.dirname(__FILE__))
+
+ n = 50000
+ delimiter = "\n\n"
+
+ frequency1 = 1000
+ puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..."
+ data1 = (0...n).map do |i|
+ (((i % frequency1 == 1) ? "\n" : "") +
+ ("s" * i) +
+ ((i % frequency1 == 0) ? "\n" : "")).freeze
+ end
+
+ frequency2 = 10
+ puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..."
+ data2 = (0...n).map do |i|
+ (((i % frequency2 == 1) ? "\n" : "") +
+ ("s" * i) +
+ ((i % frequency2 == 0) ? "\n" : "")).freeze
+ end
+
+ Benchmark.bmbm do |x|
+ x.report("1 char, freq: #{frequency1}") do
+ bt1 = BufferedTokenizer.new
+ n.times { |i| bt1.extract(data1[i]) }
+ end
+
+ x.report("2 char, freq: #{frequency1}") do
+ bt2 = BufferedTokenizer.new(delimiter)
+ n.times { |i| bt2.extract(data1[i]) }
+ end
+
+ x.report("1 char, freq: #{frequency2}") do
+ bt3 = BufferedTokenizer.new
+ n.times { |i| bt3.extract(data2[i]) }
+ end
+
+ x.report("2 char, freq: #{frequency2}") do
+ bt4 = BufferedTokenizer.new(delimiter)
+ n.times { |i| bt4.extract(data2[i]) }
+ end
+
+ end
end