Sha256: 8c04e9133edb3215a74f3333708a2d17ef926ffdc21c4ef132d5f6e8bc613c5c

Contents?: true

Size: 938 Bytes

Versions: 1

Compression:

Stored size: 938 Bytes

Contents

# encoding: utf-8
module TruncateHtml
  class HtmlString < String

    UNPAIRED_TAGS = %w(br hr img).freeze

    def initialize(original_html)
      super(original_html)
    end

    def html_tokens
      scan(regex).map do |token|
        HtmlString.new(
          token.gsub(
            /\n/,'' #remove newline characters
          ).gsub(
            /\s+/, ' ' #clean out extra consecutive whitespace
          )
        )
      end
    end

    def html_tag?
      /<\/?[^>]+>/ === self && !html_comment?
    end

    def open_tag?
      /<(?!(?:#{UNPAIRED_TAGS.join('|')}|script|\/))[^>]+>/i === self
    end

    def html_comment?
      /<\s?!--.*-->/ === self
    end

    def matching_close_tag
      gsub(/<(\w+)\s?.*>/, '</\1>').strip
    end

    private
    def regex
      /(?:<script.*>.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]]\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|[[:punct:]]+|\P{P}/ #add \p{P}
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
truncate_html_chinese-0.1.0 lib/truncate_html/html_string.rb