Sha256: a240126dffe86a9333f919f4e5a48a1cbcd7af7663b83972469845e2acde5140

Contents?: true

Size: 1.3 KB

Versions: 7

Compression:

Stored size: 1.3 KB

Contents

=begin rdoc
html2text that works with Nokogiri
=end
module WWMD

  INLINETAGS =  ['a','abbr','acronym','address','b','bdo','big','cite',
                 'code','del','dfn','em','font','i','ins','kbd','label',
                 'noframes','noscript','q','s','samp','small','span',
                 'strike','strong','sub','sup','td','th','tt','u',
                 'html','body','table']
  BLOCKTAGS =   ['blockquote','center','dd','div','fieldset','form',
                 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
  LISTTAGS =    ['dir','dl','menu','ol','ul']
  ITEMTAGS =    ['li','dt']
  SPECIALTAGS = ['br','hr']

  class Page
    def html2text
      arr = []
      self.scrape.hdoc.traverse do |x|
        arr << [x.parent.name,x.text] if x.text?
        if x.elem?
          arr << [x.name,""] if SPECIALTAGS.include?(x.name)
        end
      end
      ret = ""
      arr.each do |name,str|
        (ret += "\n"; next ) if name == "br"
        (ret += "\n" + ("-" * 72) + "\n"; next) if name == "hr"
        s = str.strip
        if BLOCKTAGS.include?(name) or LISTTAGS.include?(name)
          s += "\n"
        elsif ITEMTAGS.include?(name)
          s = "* " + s + "\n"
        end
        ret += s
      end
      ret.gsub(/\n+/) { "\n" }
      ret.gsub(/[^\x20-\x7e,\n]/,"").gsub(/^\n/,"")
    end
  end
end

Version data entries

7 entries across 7 versions & 3 rubygems

Version Path
iZsh-wwmd-0.2.19 lib/wwmd/page/html2text_nokogiri.rb
miketracy-wwmd-0.2.14 lib/wwmd/nokogiri_html2text.rb
miketracy-wwmd-0.2.15 lib/wwmd/nokogiri_html2text.rb
miketracy-wwmd-0.2.16 lib/wwmd/nokogiri_html2text.rb
miketracy-wwmd-0.2.17 lib/wwmd/page/html2text_nokogiri.rb
miketracy-wwmd-0.2.19 lib/wwmd/page/html2text_nokogiri.rb
wwmd-0.2.20.3 lib/wwmd/page/html2text_nokogiri.rb