# encoding: utf-8 module TruncateHtml class HtmlString < String UNPAIRED_TAGS = %w(br hr img).freeze def initialize(original_html) super(original_html) end def html_tokens scan(regex).map do |token| HtmlString.new( token.gsub( /\n/,'' #remove newline characters ).gsub( /\s+/, ' ' #clean out extra consecutive whitespace ) ) end end def html_tag? /<\/?[^>]+>/ === self && !html_comment? end def open_tag? /<(?!(?:#{UNPAIRED_TAGS.join('|')}|script|\/))[^>]+>/i === self end def html_comment? /<\s?!--.*-->/ === self end def matching_close_tag gsub(/<(\w+)\s?.*>/, '').strip end private def regex /(?:.*<\/script>)+|<\/?[^>]+>|[[[:alpha:]]\w\|`~!@#\$%^&*\(\)\-_\+=\[\]{}:;'",\.\/?]+|\s+|[[:punct:]]/ end end end