# -*- encoding: utf-8 -*- require 'rdf' require 'strscan' unless defined?(StringScanner) ## # Serialize ruleset back to EBNF module EBNF class Writer LINE_LENGTH = 80 ## # Format rules to a String # # @param [Array] rules # @return [Object] def self.string(*rules) require 'stringio' unless defined?(StringIO) buf = StringIO.new write(buf, *rules) buf.string end ## # Format rules to $stdout # # @param [Array] rules # @return [Object] def self.print(*rules) write($stdout, *rules) end ## # Write formatted rules to an IO like object # # @param [Object] out # @param [Array] rules # @return [Object] def self.write(out, *rules) Writer.new(rules, out: out) end ## # Write formatted rules to an IO like object as HTML # # @param [Array] rules # @return [Object] def self.html(*rules) require 'stringio' unless defined?(StringIO) buf = StringIO.new Writer.new(rules, out: buf, html: true) buf.string end ## # @param [Array] rules # @param [Hash{Symbol => Object}] options # @param [#write] :out ($stdout) # @option options [Symbol] :format def initialize(rules, out: $stdout, html: false, **options) @options = options.dup # Determine max LHS length max_id = rules.max_by {|r| r.id.to_s.length}.id.to_s.length max_sym = rules.max_by {|r| r.sym.to_s.length}.sym.to_s.length lhs_length = max_sym + 3 lhs_fmt = "%-#{max_sym}s ::= " if max_id > 0 lhs_fmt = "%-#{max_id+2}s " + lhs_fmt lhs_length += max_id + 3 end rhs_length = LINE_LENGTH - lhs_length if html # Output as formatted HTML begin require 'haml' hout = Haml::Engine.new(HAML_DESC).render(self, rules: rules) do |rule| formatted_expr = format(rule.expr) formatted_expr.length > rhs_length ? format(rule.expr, "\n") : formatted_expr end out.write hout return rescue LoadError $stderr.puts "Generating HTML requires haml gem to be loaded" end end # Format each rule, considering the available rhs size rules.each do |rule| buffer = if rule.pass? "%-#{lhs_length-2}s" % "@pass" else lhs_fmt % {id: "[#{rule.id}]", sym: rule.sym} end formatted_expr = format(rule.expr) if formatted_expr.length > rhs_length buffer << format(rule.expr, ("\n" + " " * lhs_length)) else buffer << formatted_expr end out.puts(buffer) end end protected # Format the expression part of a rule def format(expr, sep = nil) return (@options[:html] ? %(#{expr}) : expr.to_s) if expr.is_a?(Symbol) if expr.is_a?(String) if expr.length == 1 return format_char(expr) elsif expr =~ /\A#x\h+/ return (@options[:html] ? %(#{expr}) : expr) elsif expr =~ /"/ return (@options[:html] ? %('#{escape(expr, "'")}') : %('#{escape(expr, "'")}')) else return (@options[:html] ? %("#{escape(expr, '"')}") : %("#{escape(expr, '"')}")) end end parts = { alt: (@options[:html] ? "| " : "| "), diff: (@options[:html] ? "- " : "- "), star: (@options[:html] ? "* " : "*"), plus: (@options[:html] ? "+ " : "+"), opt: (@options[:html] ? "? " : "?") } lparen = (@options[:html] ? "( " : "(") rparen = (@options[:html] ? ") " : ")") case expr.first when :alt, :diff this_sep = (sep ? sep : " ") + parts[expr.first.to_sym] expr[1..-1].map {|e| format(e)}.join(this_sep) when :star, :plus, :opt raise "Expected star expression to have a single operand" unless expr.length == 2 char = parts[expr.first.to_sym] r = format(expr[1]) (r.start_with?("(") || Array(expr[1]).length == 1) ? "#{r}#{char}" : "(#{r})#{char}" when :hex (@options[:html] ? %(#{expr.last}) : expr.last) when :range format_range(expr.last) when :seq this_sep = (sep ? sep : " ") expr[1..-1].map {|e| r = format(e); Array(e).length > 2 ? "#{lparen}#{r}#{rparen}" : r}.join(this_sep) else raise "Unknown operator: #{expr.first}" end end # Format a single-character string, prefering hex for non-main ASCII def format_char(c) case c.ord when 0x22 then (@options[:html] ? %('"') : %{'"'}) when (0x23..0x7e) then (@options[:html] ? %("#{c}") : %{"#{c}"}) else (@options[:html] ? %(#{escape_hex(c)}) : escape_hex(c)) end end # Format a range def format_range(string) lbrac = (@options[:html] ? "[ " : "[") rbrac = (@options[:html] ? "] " : "]") dash = (@options[:html] ? "- " : "-") buffer = lbrac s = StringScanner.new(string) while !s.eos? case when s.scan(/\A[!"\u0024-\u007e]+/) buffer << (@options[:html] ? %(#{s.matched}) : s.matched) when s.scan(/\A#x\h+/) buffer << (@options[:html] ? %(#{s.matched}) : s.matched) when s.scan(/\A-/) buffer << dash else buffer << (@options[:html] ? %(#{escape_hex(s.getch)}) : escape_hex(s.getch)) end end buffer + rbrac end # Escape a string, using as many UTF-8 characters as possible def escape(string, quote = '"') buffer = "" string.each_char do |c| buffer << case (u = c.ord) when (0x00..0x1f) then "#x%02X" % u when quote.ord then "#x%02X" % u else c end end buffer end def escape_hex(u) fmt = case u.ord when 0x0000..0x00ff then "#x%02X" when 0x0100..0xffff then "#x%04X" else "#x%08X" end sprintf(fmt, u.ord) end HAML_DESC = %q( %table.grammar %tbody#grammar-productions - rules.each do |rule| %tr{id: "grammar-production-#{rule.sym}"} - if rule.pass? %td{colspan: 3} %code<="@pass" - else %td<= "[#{rule.id}]" %td< %code<= rule.sym %td<= "::=" %td != yield rule ).gsub(/^ /, '') end end