Sha256: 430951f2dd4f29cffc4a3e54469c589643cc8cfba4735d784e2fb581c7358775
Contents?: true
Size: 1.5 KB
Versions: 14
Compression:
Stored size: 1.5 KB
Contents
# frozen_string_literal: true require "parslet" require "parslet/convenience" require_relative "parse" require_relative "transform" require_relative "constants" module Plurimath class UnicodeMath class Parser attr_accessor :text LABELED_TR_REGEX = /"([^"]*(#|#|\\\\eqno)[^"]*[^"]*|[^"]*(#|#|\\\\eqno)[^"]*[^"]*)"/ def initialize(text) text = pre_processing(text) @text = HTMLEntities.new.encode(text, :hexadecimal) @text.gsub!("&", "&") @text.gsub!(""", "\"") @text.gsub!(/⫷.*⫸/, "") @text.gsub!(/\\\\/, "\\") @text.gsub!(/\\u([\da-fA-F]{1,5})\w{0,5}/) { "&#x#{$1};" } # Converting \u#{xxxx} encoding to &#x#{xxxx}; @text.strip! end def parse tree = Parse.new.parse(text) tree = post_processing(tree) if @splitted Math::Formula.new( Array( Transform.new.apply(tree), ), ) end private def post_processing(tree) { labeled_tr_value: tree, labeled_tr_id: @splitted } end def pre_processing(text) text unless text.include?("#") && !text.match?(LABELED_TR_REGEX) text.gsub!(/✎\(.*(\#).*\)/) do |str| str.gsub!("#", "\"replacement\"") end splitted = text.split("#") splitted.first.gsub!("\"replacement\"", "#") @splitted = splitted.last if splitted.count > 1 splitted.first end end end end
Version data entries
14 entries across 14 versions & 1 rubygems