lib/isodoc/function/cleanup.rb in isodoc-2.5.6 vs lib/isodoc/function/cleanup.rb in isodoc-2.5.7
- old
+ new
@@ -28,36 +28,41 @@
def table_long_strings_cleanup(docxml)
return unless @break_up_urls_in_tables
docxml.xpath("//td | //th").each do |d|
d.traverse do |n|
- next unless n.text?
-
- n.replace(HTMLEntities.new.encode(
- break_up_long_strings(n.text),
- ))
+ n.text? or next
+ ret = break_up_long_str(n.text)
+ n.content = ret
end
end
end
- def break_up_long_strings(text)
- return text if /^\s*$/.match?(text)
+ LONGSTR_THRESHOLD = 10
+ LONGSTR_NOPUNCT = 2
+ def break_up_long_str(text)
+ /^\s*$/.match?(text) and return text
text.split(/(?=\s)/).map do |w|
- if /^\s*$/.match(text) || (w.size < 30) then w
+ if /^\s*$/.match(text) || (w.size < LONGSTR_THRESHOLD) then w
else
- w.scan(/.{,30}/).map do |w1|
- w1.size < 30 ? w1 : break_up_long_strings1(w1)
+ w.scan(/.{,#{LONGSTR_THRESHOLD}}/o).map.with_index do |w1, i|
+ w1.size < LONGSTR_THRESHOLD ? w1 : break_up_long_str1(w1, i + 1)
end.join
end
end.join
end
- def break_up_long_strings1(text)
- s = text.split(%r{(?<=[,.?+;/=])})
- if s.size == 1 then "#{text} "
+ # break on punct every LONGSTRING_THRESHOLD chars
+ # break regardless every LONGSTRING_THRESHOLD * LONGSTR_NOPUNCT
+ def break_up_long_str1(text, iteration)
+ s = text.split(%r{(?<=[,.?+;/=(\[])})
+ if s.size == 1
+ (iteration % LONGSTR_NOPUNCT).zero? and
+ text += "\u200b"
+ text
else
- s[-1] = " #{s[-1]}"
+ s[-1] = "\u200b#{s[-1]}"
s.join
end
end
def admonition_cleanup(docxml)