lib/ebnf/writer.rb in ebnf-0.3.5 vs lib/ebnf/writer.rb in ebnf-0.3.6
- old
+ new
@@ -1,7 +1,8 @@
# -*- encoding: utf-8 -*-
require 'rdf'
+require 'strscan' unless defined?(StringScanner)
##
# Serialize ruleset back to EBNF
module EBNF
class Writer
@@ -37,29 +38,55 @@
def self.write(out, *rules)
Writer.new(rules, out: out)
end
##
+ # Write formatted rules to an IO like object as HTML
+ #
+ # @param [Array<Rule>] rules
+ # @return [Object]
+ def self.html(*rules)
+ require 'stringio' unless defined?(StringIO)
+ buf = StringIO.new
+ Writer.new(rules, out: buf, html: true)
+ buf.string
+ end
+
+ ##
# @param [Array<Rule>] rules
# @param [Hash{Symbol => Object}] options
# @option options [Symbol] :format
# @option options [#write] :out ($stdout)
+ # @option options [Boolean] :html (false)
+ # Format as HTML
def initialize(rules, options = {})
+ @options = options.dup
out = options.fetch(:out, $stdio)
#fmt = options.fetch(:format, :ebnf)
# Determine max LHS length
max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length
max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length
lhs_length = max_sym + 3
- lhs_fmt = "%-#{max_sym}{sym} ::= "
+ lhs_fmt = "%<sym>-#{max_sym}s ::= "
if max_id > 0
- lhs_fmt = "%-#{max_id+2}{id} " + lhs_fmt
+ lhs_fmt = "%<id>-#{max_id+2}s " + lhs_fmt
lhs_length += max_id + 3
end
rhs_length = LINE_LENGTH - lhs_length
+ if @options[:html]
+ # Output as formatted HTML
+ require 'haml'
+ html = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule|
+ formatted_expr = format(rule.expr)
+ formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr
+ end
+ out.write html
+ return
+ end
+
# Format each rule, considering the available rhs size
rules.each do |rule|
buffer = if rule.pass?
"%-#{lhs_length-2}s" % "@pass"
else
@@ -76,50 +103,120 @@
end
protected
# Format the expression part of a rule
def format(expr, sep = nil)
- return expr.to_s if expr.is_a?(Symbol)
- return %("#{escape(expr)}") if expr.is_a?(String)
+ return (@options[:html] ? %(<a href="#grammar-production-#{expr}">#{expr}</a>) : expr.to_s) if expr.is_a?(Symbol)
+ if expr.is_a?(String)
+ if expr.length == 1
+ return format_char(expr)
+ elsif expr =~ /\A#x\h+/
+ return (@options[:html] ? %(<code class="grammar-char-escape">#{expr}</code>) : expr)
+ elsif expr =~ /"/
+ return (@options[:html] ? %('<code class="grammar-literal">#{escape(expr, "'")}</code>') : %('#{escape(expr, "'")}'))
+ else
+ return (@options[:html] ? %("<code class="grammar-literal">#{escape(expr, '"')}</code>") : %("#{escape(expr, '"')}"))
+ end
+ end
+ parts = {
+ alt: (@options[:html] ? "<code>|</code> " : "| "),
+ diff: (@options[:html] ? "<code>-</code> " : "- "),
+ star: (@options[:html] ? "<code>*</code> " : "*"),
+ plus: (@options[:html] ? "<code>+</code> " : "+"),
+ opt: (@options[:html] ? "<code>?</code> " : "?")
+ }
+ lparen = (@options[:html] ? "<code>(</code> " : "(")
+ rparen = (@options[:html] ? "<code>)</code> " : ")")
case expr.first
when :alt, :diff
- this_sep = (sep ? sep : " ") + {alt: "| ", diff: "- "}[expr.first.to_sym]
+ this_sep = (sep ? sep : " ") + parts[expr.first.to_sym]
expr[1..-1].map {|e| format(e)}.join(this_sep)
when :star, :plus, :opt
raise "Expected star expression to have a single operand" unless expr.length == 2
- char = {star: "*", plus: "+", opt: "?"}[expr.first.to_sym]
+ char = parts[expr.first.to_sym]
r = format(expr[1])
(r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}"
+ when :hex
+ (@options[:html] ? %(<code class="grammar-char-escape">#{expr.last}</code>) : expr.last)
when :range
- parts = expr.last.split(/(?!\\)-/, 2)
- "[" + parts.map {|e| format(e)[1..-2]}.join("-") + "]"
+ format_range(expr.last)
when :seq
this_sep = (sep ? sep : " ")
- expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "(#{r})" : r}.join(this_sep)
+ expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep)
else
raise "Unknown operator: #{expr.first}"
end
end
- def escape(string)
+ # Format a single-character string, prefering hex for non-main ASCII
+ def format_char(c)
+ case c.ord
+ when 0x22 then (@options[:html] ? %('<code class="grammar-literal">"</code>') : %{'"'})
+ when (0x23..0x7e) then (@options[:html] ? %("<code class="grammar-literal">#{c}</code>") : %{"#{c}"})
+ else (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(c)}</code>) : escape_hex(c))
+ end
+ end
+
+ # Format a range
+ def format_range(string)
+ lbrac = (@options[:html] ? "<code>[</code> " : "[")
+ rbrac = (@options[:html] ? "<code>]</code> " : "]")
+ dash = (@options[:html] ? "<code>-</code> " : "-")
+
+ buffer = lbrac
+ s = StringScanner.new(string)
+ while !s.eos?
+ case
+ when s.scan(/\A[!"\u0024-\u007e]+/)
+ buffer << (@options[:html] ? %(<code class="grammar-literal">#{s.matched}</code>) : s.matched)
+ when s.scan(/\A#x\h+/)
+ buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{s.matched}</code>) : s.matched)
+ when s.scan(/\A-/)
+ buffer << dash
+ else
+ buffer << (@options[:html] ? %(<code class="grammar-char-escape">#{escape_hex(s.getch)}</code>) : escape_hex(s.getch))
+ end
+ end
+ buffer + rbrac
+ end
+
+ # Escape a string, using as many UTF-8 characters as possible
+ def escape(string, quote = '"')
buffer = ""
string.each_char do |c|
- buffer << case c.to_s
- when "\t" then "\\t"
- when "\n" then "\\n"
- when "\r" then "\\r"
- when "\\" then "\\\\"
- #when "(" then "\\("
- #when ")" then "\\)"
- #when "[" then "\\["
- #when "]" then "\\]"
- #when "-" then "\\\\-"
- when "'" then "\\'"
- when '"' then "\\\""
- else c
+ buffer << case (u = c.ord)
+ when (0x00..0x1f) then "#x%02X" % u
+ when quote.ord then "#x%02X" % u
+ else c
end
end
buffer
end
+
+ def escape_hex(u)
+ fmt = case u.ord
+ when 0x0000..0x00ff then "#x%02X"
+ when 0x0100..0xffff then "#x%04X"
+ else "#x%08X"
+ end
+ sprintf(fmt, u.ord)
+ end
+
+ HAML_DESC = %q(
+ %table.grammar
+ %tbody#grammar-productions
+ - rules.each do |rule|
+ %tr{id: "grammar-production-#{rule.sym}"}
+ - if rule.pass?
+ %td{colspan: 3}
+ %code<="@pass"
+ - else
+ %td<= "[#{rule.id}]"
+ %td<
+ %code<= rule.sym
+ %td<= "::="
+ %td
+ != yield rule
+ ).gsub(/^ /, '')
end
end