Sha256: 34495af28e765b8529b1646080534b38aea4c5abe53c1cb1074751c4264ce13b

Contents?: true

Size: 1.83 KB

Versions: 5

Compression:

Stored size: 1.83 KB

Contents

#!/usr/bin/env ruby
# -*- coding: utf-8 -*-

$: << File.join(File.dirname(__FILE__))
$: << File.join(File.dirname(__FILE__), '..', 'lib')

require 'wp2txt'
require 'wp2txt/utils'
include Wp2txt
require 'benchmark'

data_dir = File.join(File.dirname(__FILE__), '..', "data")

parent = Wp2txt::CmdProgbar.new
input_file = File.join(data_dir, "testdata.bz2")
output_dir = data_dir
tfile_size = 10
convert = true
strip_tmarker = true

Benchmark.bm do |x|
  x.report do
    wpconv = Wp2txt::Runner.new(parent, input_file, output_dir, tfile_size, convert, strip_tmarker)
    wpconv.extract_text do |article|
      format_wiki!(article.title)
      title = "[[#{article.title}]]\n"
      convert_characters!(title)

      contents = "\nCATEGORIES: "
      contents += article.categories.join(", ")
      contents += "\n\n"

      article.elements.each do |e|
        case e.first
        when :mw_heading
          format_wiki!(e.last)
          line = e.last
        when :mw_paragraph
          format_wiki!(e.last)
          line = e.last
        when :mw_table, :mw_htable
          format_wiki!(e.last)
          line = e.last
        when :mw_pre
          line = e.last
        when :mw_quote
          format_wiki!(e.last)
          line = e.last
        when :mw_unordered, :mw_ordered, :mw_definition
          format_wiki!(e.last)
          line = e.last
        when :mw_redirect
          format_wiki!(e.last)
          line = e.last
          line += "\n\n"
        else
          next
        end
        contents << line
      end
      format_article!(contents)
      convert_characters!(contents)

      ##### cleanup #####
      if /\A\s*\z/m =~ contents
        result = ""
      else
        result = title + "\n" + contents
      end
      result = result.gsub(/\[ref\]\s*\[\/ref\]/m){""}
      result = result.gsub(/\n\n\n+/m){"\n\n"} + "\n"  
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
wp2txt-0.8.0 bin/benchmark.rb
wp2txt-0.7.8 bin/benchmark.rb
wp2txt-0.7.7 bin/benchmark.rb
wp2txt-0.7.6 bin/benchmark.rb
wp2txt-0.7.5 bin/benchmark.rb