lib/utils/xml.rb in metanorma-utils-1.8.5 vs lib/utils/xml.rb in metanorma-utils-1.9.0
- old
+ new
@@ -60,9 +60,41 @@
.gsub("—", "\u0097").gsub("–", "\u0096")
.gsub("—", "\u0097")
end
end
+ def noko(_script = "Latn", &block)
+ fragment = ::Nokogiri::XML.parse(NOKOHEAD).fragment("")
+ ::Nokogiri::XML::Builder.with fragment, &block
+ ret = fragment
+ .to_xml(encoding: "UTF-8", indent: 0,
+ save_with: Nokogiri::XML::Node::SaveOptions::AS_XML)
+ .lines.map do |l|
+ l.rstrip.gsub("–", "\u0096").gsub("—", "\u0097")
+ .gsub("–", "\u0096").gsub("—", "\u0097")
+ end
+ line_sanitise(ret)
+ end
+
+ # By default, carriage return in source translates to whitespace;
+ # but in CJK, it does not. We don't want carriage returns in the final
+ # output because of CJK complications
+ def line_sanitise(ret)
+ ret.size == 1 and return ret
+ (0...ret.size).each do |i|
+ last = firstchar_xml(ret[i].reverse)
+ nextfirst = firstchar_xml(ret[i + 1])
+ /#{CJK}/o.match?(last) && /#{CJK}/o.match?(nextfirst) or
+ ret[i] += " "
+ end
+ ret
+ end
+
+ def firstchar_xml(line)
+ m = /^(<[^>]+>)*(.)/.match(line) or return ""
+ m[2]
+ end
+
def noko_html(&block)
doc = ::Nokogiri::XML.parse(NOKOHEAD)
fragment = doc.fragment("")
::Nokogiri::XML::Builder.with fragment, &block
fragment.to_xml(encoding: "UTF-8", indent: 0,