lib/utils/xml.rb in metanorma-utils-1.8.5 vs lib/utils/xml.rb in metanorma-utils-1.9.0

- old
+ new

@@ -60,9 +60,41 @@ .gsub("&#151;", "\u0097").gsub("&#x96;", "\u0096") .gsub("&#x97;", "\u0097") end end + def noko(_script = "Latn", &block) + fragment = ::Nokogiri::XML.parse(NOKOHEAD).fragment("") + ::Nokogiri::XML::Builder.with fragment, &block + ret = fragment + .to_xml(encoding: "UTF-8", indent: 0, + save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) + .lines.map do |l| + l.rstrip.gsub("&#150;", "\u0096").gsub("&#151;", "\u0097") + .gsub("&#x96;", "\u0096").gsub("&#x97;", "\u0097") + end + line_sanitise(ret) + end + + # By default, carriage return in source translates to whitespace; + # but in CJK, it does not. We don't want carriage returns in the final + # output because of CJK complications + def line_sanitise(ret) + ret.size == 1 and return ret + (0...ret.size).each do |i| + last = firstchar_xml(ret[i].reverse) + nextfirst = firstchar_xml(ret[i + 1]) + /#{CJK}/o.match?(last) && /#{CJK}/o.match?(nextfirst) or + ret[i] += " " + end + ret + end + + def firstchar_xml(line) + m = /^(<[^>]+>)*(.)/.match(line) or return "" + m[2] + end + def noko_html(&block) doc = ::Nokogiri::XML.parse(NOKOHEAD) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment.to_xml(encoding: "UTF-8", indent: 0,