# frozen_string_literal: true module IEV module Converter class MathmlToAsciimath using DataConversions def self.convert(input) new.convert(input) end def convert(input) mathml_to_asciimath(input) end private def mathml_to_asciimath(input) # If given string does not include '<' (for elements) nor '&' # (for entities), then it's certain that it doesn't contain # any MathML or HTML formula. return input unless input&.match?(/<|&/) unless input.include?("") return html_to_asciimath(input) end # puts "GOING TO MATHML MATH" # puts input to_asciimath = Nokogiri::HTML.fragment(input, "UTF-8") # to_asciimath.remove_namespaces! to_asciimath.css("math").each do |math_element| asciimath = Plurimath::Math.parse( text_to_asciimath(math_element.to_xml), :mathml ).to_asciimath.strip # puts"ASCIIMATH!! #{asciimath}" if asciimath.empty? math_element.remove else math_element.replace "stem:[#{asciimath}]" end end html_to_asciimath( to_asciimath.children.to_s, ) end def html_to_asciimath(input) return input if input.nil? || input.empty? to_asciimath = Nokogiri::HTML.fragment(input, "UTF-8") to_asciimath.css("i").each do |math_element| # puts "HTML MATH!! #{math_element.to_xml}" # puts "HTML MATH!! #{math_element.text}" decoded = text_to_asciimath(math_element.text) case decoded.length when 1..12 # puts "(#{math_element.text} to => #{decoded})" math_element.replace "stem:[#{decoded}]" when 0 math_element.remove else math_element.replace "_#{decoded}_" end end to_asciimath.css("sub").each do |math_element| case math_element.text.length when 0 math_element.remove else math_element.replace "~#{text_to_asciimath(math_element.text)}~" end end to_asciimath.css("sup").each do |math_element| case math_element.text.length when 0 math_element.remove else math_element.replace "^#{text_to_asciimath(math_element.text)}^" end end to_asciimath.css("ol").each do |element| element.css("li").each do |li| li.replace ". #{li.text}" end end to_asciimath.css("ul").each do |element| element.css("li").each do |li| li.replace "* #{li.text}" end end # Replace sans-serif font with monospace to_asciimath.css('font[style*="sans-serif"]').each do |x| x.replace "`#{x.text}`" end html_entities_to_stem( to_asciimath .children.to_s .gsub(/\]stem:\[/, "") .gsub(/<\/?[uo]l>/, ""), ) end def text_to_asciimath(text) html_entities_to_asciimath(text.decode_html) end def html_entities_to_asciimath(input) input.gsub("α", "alpha"). gsub("β", "beta"). gsub("γ", "gamma"). gsub("Γ", "Gamma"). gsub("δ", "delta"). gsub("Δ", "Delta"). gsub("ε", "epsilon"). gsub("ϵ", "varepsilon"). gsub("ζ", "zeta"). gsub("η", "eta"). gsub("θ", "theta"). gsub("Θ", "Theta"). gsub("ϑ", "vartheta"). gsub("ι", "iota"). gsub("κ", "kappa"). gsub("λ", "lambda"). gsub("Λ", "Lambda"). gsub("μ", "mu"). gsub("ν", "nu"). gsub("ξ", "xi"). gsub("Ξ", "Xi"). gsub("π", "pi"). gsub("Π", "Pi"). gsub("ρ", "rho"). gsub("β", "beta"). gsub("σ", "sigma"). gsub("Σ", "Sigma"). gsub("τ", "tau"). gsub("υ", "upsilon"). gsub("φ", "phi"). gsub("Φ", "Phi"). gsub("ϕ", "varphi"). gsub("χ", "chi"). gsub("ψ", "psi"). gsub("Ψ", "Psi"). gsub("ω", "omega") end def html_entities_to_stem(input) input.gsub("α", "stem:[alpha]"). gsub("β", "stem:[beta]"). gsub("γ", "stem:[gamma]"). gsub("Γ", "stem:[Gamma]"). gsub("δ", "stem:[delta]"). gsub("Δ", "stem:[Delta]"). gsub("ε", "stem:[epsilon]"). gsub("ϵ", "stem:[varepsilon]"). gsub("ζ", "stem:[zeta]"). gsub("η", "stem:[eta]"). gsub("θ", "stem:[theta]"). gsub("Θ", "stem:[Theta]"). gsub("ϑ", "stem:[vartheta]"). gsub("ι", "stem:[iota]"). gsub("κ", "stem:[kappa]"). gsub("λ", "stem:[lambda]"). gsub("Λ", "stem:[Lambda]"). gsub("μ", "stem:[mu]"). gsub("ν", "stem:[nu]"). gsub("ξ", "stem:[xi]"). gsub("Ξ", "stem:[Xi]"). gsub("π", "stem:[pi]"). gsub("Π", "stem:[Pi]"). gsub("ρ", "stem:[rho]"). gsub("β", "stem:[beta]"). gsub("σ", "stem:[sigma]"). gsub("Σ", "stem:[Sigma]"). gsub("τ", "stem:[tau]"). gsub("υ", "stem:[upsilon]"). gsub("φ", "stem:[phi]"). gsub("Φ", "stem:[Phi]"). gsub("ϕ", "stem:[varphi]"). gsub("χ", "stem:[chi]"). gsub("ψ", "stem:[psi]"). gsub("Ψ", "stem:[Psi]"). gsub("ω", "stem:[omega]") end end end end