Sha256: baf3207d0dc5918e432e5948b421d3859274e2c53e1c69f756b1f0bd0b34101c

Contents?: true

Size: 1 KB

Versions: 12

Compression:

Stored size: 1 KB

Contents

require 'nokogiri'
require 'stringio'

module TextRank
  module CharFilter
    ##
    # Character filter to remove HTML tags and convert HTML entities to text.
    #
    # = Example
    #
    #  StripHtml.new.filter!(""Optimism", said Cacambo, "What is that?"")
    #  => "\"Optimism\", said Cacambo, \"What is that?\""
    #
    #  StringHtml.new.filter!("<b>Alas! It is the <u>obstinacy</u> of maintaining that everything is best when it is worst.</b>")
    #  => "Alas! It is the obstinacy of maintaining that everything is best when it is worst."
    ##
    class StripHtml < Nokogiri::XML::SAX::Document

      def initialize
        @text = StringIO.new
      end

      # Perform the filter
      # @param text [String]
      # @return [String]
      def filter!(text)
        @text.rewind
        Nokogiri::HTML::SAX::Parser.new(self).parse(text)
        @text.string
      end

      protected

      def characters(string)
        @text << ' '
        @text << string
      end

    end
  end
end

Version data entries

12 entries across 12 versions & 1 rubygems

Version Path
text_rank-1.3.0 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.9 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.5 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.4 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.3 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.2 lib/text_rank/char_filter/strip_html.rb
text_rank-1.2.0 lib/text_rank/char_filter/strip_html.rb
text_rank-1.1.7 lib/text_rank/char_filter/strip_html.rb
text_rank-1.1.6 lib/text_rank/char_filter/strip_html.rb
text_rank-1.1.5 lib/text_rank/char_filter/strip_html.rb
text_rank-1.1.1 lib/text_rank/char_filter/strip_html.rb
text_rank-1.1.0 lib/text_rank/char_filter/strip_html.rb