module EffectiveTruncateHtmlHelper
# Truncates HTML or text to a certain inner_text character limit.
#
# If given HTML, the underlying markup may be much longer than length, but the displayed text
# will be no longer than (length + omission) characters.
def truncate_html(text, length_or_content = 200, omission = '...')
doc = Nokogiri::HTML::DocumentFragment.parse(text)
if length_or_content.kind_of?(String)
content = (Nokogiri::HTML::DocumentFragment.parse(length_or_content).children.first.inner_text rescue length_or_content)
doc.tap { |doc| _truncate_node_to_content(doc, content, omission) }.inner_html
elsif length_or_content.kind_of?(Integer)
doc.tap { |doc| _truncate_node_to_length(doc, length_or_content, omission) }.inner_html
else
raise 'Unsupported datatype passed to second argument of truncate_html. Expecting integer or string.'
end
end
def _truncate_node_to_content(node, content, omission, seen = false)
if seen == true
node.remove
elsif node.children.blank?
index = node.content.index(content)
if index.present?
if node.parent.try(:content) == content # If my parent node just has my text in it, remove parent node too
node.parent.remove
elsif index == 0
node.remove
else
node.content = truncate(node.content, length: index+omission.to_s.length, separator: ' ', omission: omission)
end
seen = true
end
else
node.children.each { |child| seen = _truncate_node_to_content(child, content, omission, seen) }
end
seen
end
def _truncate_node_to_length(node, length, omission)
if node.inner_text.length <= length
# Do nothing, we're already reached base case
elsif node.name == 'a'
node.remove # I don't want to truncate anything in a link
elsif node.children.blank?
# I need to truncate myself, and I'm certainly a text node
if node.text?
node.content = truncate(node.content, length: length+omission.to_s.length, separator: ' ', omission: omission)
else
node.remove
end
else # Go through all the children, and delete anything after the length has been reached
child_length = 0
node.children.each do |child|
child_length > length ? (child.remove) : (child_length += child.inner_text.length)
end
# We have now removed all nodes after length, but the last node is longer than our length
# child_length is the inner_text length of all included nodes
# And we only have to truncate the last child to get under length
child = node.children.last
child_max_length = length - (child_length - child.inner_text.length)
_truncate_node_to_length(child, child_max_length, omission)
end
end
end