lib/ebnf/writer.rb in ebnf-2.1.0 vs lib/ebnf/writer.rb in ebnf-2.1.1

- old
+ new

@@ -6,10 +6,11 @@ ## # Serialize ruleset back to EBNF module EBNF class Writer LINE_LENGTH = 80 + LINE_LENGTH_HTML = 200 # ASCII escape names ASCII_ESCAPE_NAMES = [ "null", #x00 "start of heading", #x01 @@ -116,23 +117,25 @@ end if format == :ebnf && max_id > 0 lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt lhs_length += max_id + 3 end - rhs_length = LINE_LENGTH - lhs_length + rhs_length = (html ? LINE_LENGTH_HTML : LINE_LENGTH) - lhs_length if html # Output as formatted HTML begin require 'erubis' + require 'htmlentities' + @coder = HTMLEntities.new eruby = Erubis::Eruby.new(ERB_DESC) formatted_rules = rules.map do |rule| if rule.kind == :terminals || rule.kind == :pass OpenStruct.new(id: ("@#{rule.kind}"), sym: nil, assign: nil, - formatted: ("<strong>Productions for terminals</strong>" if rule.kind == :terminals)) + formatted: ("<strong># Productions for terminals</strong>" if rule.kind == :terminals)) else formatted_expr = self.send(format_meth, rule.expr) # Measure text without markup formatted_expr_text = formatted_expr.gsub(%r{</?\w+[^>]*>}, '') if formatted_expr_text.length > rhs_length && (format != :abnf || rule.alt?) @@ -149,11 +152,11 @@ (ndx > 0 ? '=/' : '=') else formatted.sub!(%r{\s*<code>\|</code>\s*}, '') (ndx > 0 ? (rule.alt? ? '|' : '') : '=') end - lines << OpenStruct.new(id: ("[#{rule.id}]" if rule.id), + lines << OpenStruct.new(id: ((ndx == 0 ? "[#{rule.id}]" : "") if rule.id), sym: (rule.sym if ndx == 0 || format == :abnf), assign: assign, formatted: formatted) end if format == :isoebnf @@ -169,11 +172,11 @@ end end.flatten out.write eruby.evaluate(format: format, rules: formatted_rules) return rescue LoadError - $stderr.puts "Generating HTML requires erubis gem to be loaded" + $stderr.puts "Generating HTML requires erubis and htmlentities gems to be loaded" end end # Format each rule, considering the available rhs size rules.each do |rule| @@ -214,11 +217,11 @@ # W3C EBNF Formatters ## # Format the expression part of a rule def format_ebnf(expr, sep: nil, embedded: false) - return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) return expr.length == 1 ? format_ebnf_char(expr) : format_ebnf_string(expr, expr.include?('"') ? "'" : '"') end @@ -288,14 +291,14 @@ end # Format a single-character string, prefering hex for non-main ASCII def format_ebnf_char(c) case c.ord - when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"}) - when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'}) - when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"}) - when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"}) + when (0x21) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"}) + when 0x22 then (@options[:html] ? %('<code class="grammar-literal">&quot;</code>') : %{'"'}) + when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"}) + when (0x80..0xFFFD) then (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode c}</code>") : %{"#{c}"}) else escape_ebnf_hex(c) end end # Format a range @@ -306,11 +309,11 @@ buffer = lbrac s = StringScanner.new(string) while !s.eos? case when s.scan(/\A[!"\u0024-\u007e]+/) - buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched) + buffer << (@options[:html] ? %(<code class="grammar-literal">#{@coder.encode s.matched}</code>) : s.matched) when s.scan(/\A#x\h+/) buffer << escape_ebnf_hex(s.matched[2..-1].hex.chr(Encoding::UTF_8)) else buffer << escape_ebnf_hex(s.getch) end @@ -326,11 +329,12 @@ raise RangeError, "cannot format #{string.inspect} as an EBNF String: #{c.inspect} is out of range" unless ISOEBNF::TERMINAL_CHARACTER.match?(c) end end - "#{quote}#{string}#{quote}" + res = "#{quote}#{string}#{quote}" + @options[:html] ? @coder.encode(res) : res end def escape_ebnf_hex(u) fmt = case u.ord when 0x00..0x20 then "#x%02X" @@ -339,15 +343,15 @@ else "#x%08X" end char = fmt % u.ord if @options[:html] if u.ord <= 0x20 - char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>) + char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>) elsif u.ord < 0x7F - char = %(<abbr title="ascii '#{u}'">#{char}</abbr>) + char = %(<abbr title="ascii '#{@coder.encode u}'">#{@coder.encode char}</abbr>) elsif u.ord == 0x7F - char = %(<abbr title="delete">#{char}</abbr>) + char = %(<abbr title="delete">#{@coder.encode char}</abbr>) elsif u.ord <= 0xFF char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>) else char = %(<abbr title="unicode '#{u}'">#{char}</abbr>) end @@ -361,11 +365,11 @@ # ABNF Formatters ## # Format the expression part of a rule def format_abnf(expr, sep: nil, embedded: false, sensitive: true) - return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) if expr.length == 1 return format_abnf_char(expr) elsif expr.start_with?('%') # Already encoded @@ -378,11 +382,11 @@ seq = segments.inject([]) {|memo, s| memo.concat([[:hex, "#x22"], s])}[1..-1] seq.unshift(:seq) return format_abnf(seq, sep: nil, embedded: false) else - return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{expr}</code>") : %(#{'%s' if sensitive}"#{expr}")) + return (@options[:html] ? %("<code class="grammar-literal">#{'%s' if sensitive}#{@coder.encode expr}</code>") : %(#{'%s' if sensitive}"#{expr}")) end end parts = { alt: (@options[:html] ? "<code>/</code> " : "/ "), star: (@options[:html] ? "<code>*</code> " : "*"), @@ -526,15 +530,15 @@ else "%08X" end char = "%x" + (fmt % u.ord) if @options[:html] if u.ord <= 0x20 - char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{char}</abbr>) + char = %(<abbr title="#{ASCII_ESCAPE_NAMES[u.ord]}">#{@coder.encode char}</abbr>) elsif u.ord <= 0x7F - char = %(<abbr title="ascii '#{u}'">#{char}</abbr>) + char = %(<abbr title="ascii '#{u}'">#{@coder.encode char}</abbr>) elsif u.ord == 0x7F - char = %(<abbr title="delete">#{char}</abbr>) + char = %(<abbr title="delete">#{@coder.encode char}</abbr>) elsif u.ord <= 0xFF char = %(<abbr title="extended ascii '#{u}'">#{char}</abbr>) else char = %(<abbr title="unicode '#{u}'">#{char}</abbr>) end @@ -548,20 +552,20 @@ # ISO EBNF Formatters ## # Format the expression part of a rule def format_isoebnf(expr, sep: nil, embedded: false) - return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol) + return (@options[:html] ? %(<a href="#grammar-production-#{@coder.encode expr}">#{@coder.encode expr}</a>) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) expr = expr[2..-1].hex.chr if expr =~ /\A#x\h+/ expr.chars.each do |c| raise RangeError, "cannot format #{expr.inspect} as an ISO EBNF String: #{c.inspect} is out of range" unless ISOEBNF::TERMINAL_CHARACTER.match?(c) end if expr =~ /"/ - return (@options[:html] ? %('<code class="grammar-literal">#{expr}</code>') : %('#{expr}')) + return (@options[:html] ? %('<code class="grammar-literal">#{@coder.encode expr}</code>') : %('#{expr}')) else - return (@options[:html] ? %("<code class="grammar-literal">#{expr}</code>") : %("#{expr}")) + return (@options[:html] ? %("<code class="grammar-literal">#{@coder.encode expr}</code>") : %("#{expr}")) end end parts = { alt: (@options[:html] ? "<code>|</code> " : "| "), diff: (@options[:html] ? "<code>-</code> " : "- "),