module Asciimath2UnitsML
class Conv
include Rsec::Helpers
def read_yaml(path)
validate_yaml(symbolize_keys(YAML.load_file(File.join(File.join(File.dirname(__FILE__), path)))), path)
end
def flip_name_and_symbol(hash)
hash.each_with_object({}) do |(k, v), m|
next if v.name.nil? || v.name.empty?
m[v.symbolid] = v
end
end
def flip_name_and_symbols(hash)
hash.each_with_object({}) do |(k, v), m|
next if v.name.nil? || v.name.empty?
v.symbolids.each { |s| m[s] = v }
end
end
def validate_yaml(hash, path)
return hash if path == "../unitsdb/quantities.yaml"
return hash if path == "../unitsdb/dimensions.yaml"
hash.each_with_object({}) do |(k, v), m|
path == "../unitsdb/units.yaml" and validate_unit(v)
m = validate_symbols(m, v)
v[:unit_symbols]&.each { |s| validate_unit_symbol_cardinality(s, k) }
end
hash
end
def validate_unit(v)
if v[:quantity_reference]
v[:quantity_reference].is_a?(Array) or
raise StandardError.new "No quantity_reference array provided for unit: #{v}"
end
if v[:unit_name]
v[:unit_name].is_a?(Array) or raise StandardError.new "No unit_name array provided for unit: #{v}"
end
end
def validate_symbols(m, v)
symbol = symbol_key(v)
!symbol.nil? or raise StandardError.new "No symbol provided for unit: #{v}"
Array(symbol)&.each do |s|
m[s] && s != "1" and
raise StandardError.new "symbol #{s} is not unique in #{v}: already used for #{m[s]}"
m[s] = v
end
m
end
def validate_unit_symbol_cardinality(us, k)
return true if us.nil?
!us[:id].nil? && !us[:ascii].nil? && !us[:html].nil? && !us[:mathml].nil? && !us[:latex].nil? &&
!us[:unicode].nil? and return true
raise StandardError.new "malformed unit_symbol for #{k}: #{us}"
end
def symbol_key(v)
symbol = v[:unit_symbols]&.each_with_object([]) { |s, m| m << (s["id"] || s[:id]) } ||
v.dig(:symbol, :ascii) || v[:symbol] #|| v[:short]
symbol = [symbol] if !symbol.nil? && v[:unit_symbols] && !symbol.is_a?(Array)
symbol
end
def symbolize_keys(hash)
return hash if hash.is_a? String
hash.inject({}) do |result, (key, value)|
new_key = case key
when String then key.to_sym
else key
end
new_value = case value
when Hash then symbolize_keys(value)
when Array then value.map { |m| symbolize_keys(m) }
else value
end
result[new_key] = new_value
result
end
end
def parser
prefix2 = /#{@prefixes.keys.select { |x| x.size == 2 }.join("|")}/.r
prefix1 = /#{@prefixes.keys.select { |x| x.size == 1 }.join("|")}/.r
unit_keys = @units.keys.reject do |k|
/\*|\^|\/|^1$/.match(k) || @units[k].prefixed
end.map { |k| Regexp.escape(k) }
unit1 = /#{unit_keys.sort_by(&:length).reverse.join("|")}/.r
exponent = /\^\(-?\d+\)/.r.map { |m| m.sub(/\^/, "").gsub(/[()]/, "") } |
/\^-?\d+/.r.map { |m| m.sub(/\^/, "") }
multiplier = %r{\*|//|/}.r.map { |x| { multiplier: x[0] } }
unit =
seq("sqrt(", unit1, ")") { |x| { prefix: nil, unit: x[1], display_exponent: "0.5" } } |
seq("sqrt(", prefix1, unit1, ")") { |x| { prefix: x[1], unit: x[2], display_exponent: "0.5" } } |
seq("sqrt(", prefix2, unit1, ")") { |x| { prefix: x[1], unit: x[2], display_exponent: "0.5" } } |
seq(unit1, exponent._? & multiplier) { |x| { prefix: nil, unit: x[0], display_exponent: (x[1][0] )} } |
seq(unit1, exponent._?).eof { |x| { prefix: nil, unit: x[0], display_exponent: (x[1][0] )} } |
seq(prefix1, unit1, exponent._? ) { |x| { prefix: x[0], unit: x[1], display_exponent: (x[2][0] ) } } |
seq(prefix2, unit1, exponent._? ) { |x| { prefix: x[0], unit: x[1], display_exponent: (x[2][0] ) } } |
"1".r.map { |_| { prefix: nil, unit: "1", display_exponent: nil } }
units = seq(prefix2, "-") { |x| [{ prefix: x[0], unit: nil, display_exponent: nil }] } |
seq(prefix1, "-") { |x| [{ prefix: x[0], unit: nil, display_exponent: nil }] } |
unit.join(multiplier)
parser = units.eof
end
def parse(x)
text = Array(x.split(/,\s*/))
units = @parser.parse!(text[0])
if !units || Rsec::INVALID[units]
raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
end
Rsec::Fail.reset
postprocess(units, text)
end
def postprocess(units, text)
units = postprocess1(units)
quantity = text[1..-1]&.select { |x| /^quantity:/.match(x) }&.first&.sub(/^quantity:\s*/, "")
name = text[1..-1]&.select { |x| /^name:/.match(x) }&.first&.sub(/^name:\s*/, "")
symbol = text[1..-1]&.select { |x| /^symbol:/.match(x) }&.first&.sub(/^symbol:\s*/, "")
multiplier = text[1..-1]&.select { |x| /^multiplier:/.match(x) }&.first&.sub(/^multiplier:\s*/, "")
normtext = units_only(units).each.map do |u|
exp = u[:exponent] && u[:exponent] != "1" ? "^#{u[:exponent]}" : ""
"#{u[:prefix]}#{u[:unit]}#{exp}"
end.join("*")
[units, text[0], normtext, quantity, name, symbol, multiplier]
end
def postprocess1(units)
inverse = false
units.each_with_object([]) do |u, m|
if u[:multiplier]
inverse = (u[:multiplier] == "/")
else
u[:exponent] = inverse ? "-#{u[:display_exponent] || '1'}" : u[:display_exponent]
u[:exponent] = u[:exponent]&.sub(/^--+/, "")
end
m << u
end
end
def Asciimath2UnitsML(expression)
xml = Nokogiri::XML(asciimath2mathml(expression))
MathML2UnitsML(xml).to_xml
end
# https://www.w3.org/TR/mathml-units/ section 2: delimit number Invisible-Times unit
def MathML2UnitsML(xml)
xml.is_a? String and xml = Nokogiri::XML(xml)
xml.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
next unless %r{^unitsml\(.+\)$}.match(x.text)
text = x.text.sub(%r{^unitsml\((.+)\)$}m, "\\1")
units, origtext, normtext, quantity, name, symbol, multiplier = parse(text)
rendering = symbol ? embeddedmathml(asciimath2mathml(symbol)) :
mathmlsymbol(units, false, multiplier)
x.replace("#{delimspace(rendering, x)}#{rendering}\n"\
"#{unitsml(units, origtext, normtext, quantity, name)}")
end
dedup_ids(xml)
end
# if previous sibling's last descendent non-whitespace is MathML and mn or mi, no space
def delimspace(rendering, elem)
prec_text_elem = elem.xpath("./preceding-sibling::*[namespace-uri() = '#{MATHML_NS}']/"\
"descendant::text()[normalize-space()!=''][last()]/parent::*").last
return "" if prec_text_elem.nil? || !%w(mn mi).include?(prec_text_elem&.name)
text = HTMLEntities.new.encode(Nokogiri::XML("#{rendering}").text.strip)
/\p{L}|\p{N}/.match(text) ?
"" : ""
end
def dedup_ids(xml)
%w(Unit Dimension Prefix Quantity).each do |t|
xml.xpath(".//m:#{t}/@xml:id", "m" => UNITSML_NS).map { |a| a.text }.uniq.each do |v|
xml.xpath(".//*[@xml:id = '#{v}']").each_with_index do |n, i|
next if i == 0
n.remove
end
end
end
xml
end
def asciimath2mathml(expression)
AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
AsciiMath.parse(HTMLEntities.new.decode(expression)).ast).to_s.
gsub(/