Sha256: baf3207d0dc5918e432e5948b421d3859274e2c53e1c69f756b1f0bd0b34101c
Contents?: true
Size: 1 KB
Versions: 12
Compression:
Stored size: 1 KB
Contents
require 'nokogiri' require 'stringio' module TextRank module CharFilter ## # Character filter to remove HTML tags and convert HTML entities to text. # # = Example # # StripHtml.new.filter!(""Optimism", said Cacambo, "What is that?"") # => "\"Optimism\", said Cacambo, \"What is that?\"" # # StringHtml.new.filter!("<b>Alas! It is the <u>obstinacy</u> of maintaining that everything is best when it is worst.</b>") # => "Alas! It is the obstinacy of maintaining that everything is best when it is worst." ## class StripHtml < Nokogiri::XML::SAX::Document def initialize @text = StringIO.new end # Perform the filter # @param text [String] # @return [String] def filter!(text) @text.rewind Nokogiri::HTML::SAX::Parser.new(self).parse(text) @text.string end protected def characters(string) @text << ' ' @text << string end end end end
Version data entries
12 entries across 12 versions & 1 rubygems