lib/ebnf/base.rb in ebnf-2.0.0 vs lib/ebnf/base.rb in ebnf-2.1.0

- old
+ new

@@ -64,26 +64,10 @@ # [Yacker]: https://rubygems/02/26-modules/User/Yacker # [SPARQL specification]: https://www.w3.org/TR/rdf-sparql-query/ # [Cwm Release 1.1.0rc1]: https://lists.w3.org/Archives/Public/public-cwm-announce/2005JulSep/0000.html # [bnf-rules.n3]: https://www.w3.org/2000/10/swap/grammar/bnf-rules.n3 # -# Open Issues and Future Work -# --------------------------- -# -# The yacker output also has the terminals compiled to elaborate regular -# expressions. The best strategy for dealing with lexical tokens is not -# yet clear. Many tokens in SPARQL are case insensitive; this is not yet -# captured formally. -# -# The schema for the EBNF vocabulary used here (``g:seq``, ``g:alt``, ...) -# is not yet published; it should be aligned with [swap/grammar/bnf][] -# and the [bnf2html.n3][] rules (and/or the style of linked XHTML grammar -# in the SPARQL and XML specificiations). -# -# It would be interesting to corroborate the claim in the SPARQL spec -# that the grammar is LL(1) with a mechanical proof based on N3 rules. -# # [swap/grammar/bnf]: https://www.w3.org/2000/10/swap/grammar/bnf # [bnf2html.n3]: https://www.w3.org/2000/10/swap/grammar/bnf2html.n3 # # Background # ---------- @@ -98,11 +82,11 @@ # @author Gregg Kellogg module EBNF class Base include BNF include LL1 - include Parser + include Native include PEG # Abstract syntax tree from parse # # @return [Array<Rule>] @@ -116,53 +100,99 @@ # Parse the string or file input generating an abstract syntax tree # in S-Expressions (similar to SPARQL SSE) # # @param [#read, #to_s] input # @param [Symbol] format (:ebnf) - # Format of input, one of :ebnf, or :sxp + # Format of input, one of `:abnf`, `:ebnf`, `:isoebnf`, `:isoebnf`, `:native`, or `:sxp`. + # Use `:native` for the native EBNF parser, rather than the PEG parser. # @param [Hash{Symbol => Object}] options # @option options [Boolean, Array] :debug # Output debug information to an array or $stdout. + # @option options [Boolean, Array] :validate + # Validate resulting grammar. def initialize(input, format: :ebnf, **options) @options = options.dup @lineno, @depth, @errors = 1, 0, [] - terminal = false @ast = [] input = input.respond_to?(:read) ? input.read : input.to_s case format - when :sxp - require 'sxp' unless defined?(SXP) - @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)} + when :abnf + abnf = ABNF.new(input, **options) + @ast = abnf.ast when :ebnf + ebnf = Parser.new(input, **options) + @ast = ebnf.ast + when :isoebnf + iso = ISOEBNF.new(input, **options) + @ast = iso.ast + when :native + terminals = false scanner = StringScanner.new(input) eachRule(scanner) do |r| debug("rule string") {r.inspect} case r when /^@terminals/ # Switch mode to parsing terminals - terminal = true + terminals = true + rule = Rule.new(nil, nil, nil, kind: :terminals, ebnf: self) + @ast << rule when /^@pass\s*(.*)$/m expr = expression($1).first rule = Rule.new(nil, nil, expr, kind: :pass, ebnf: self) rule.orig = expr @ast << rule else rule = depth {ruleParts(r)} - rule.kind = :terminal if terminal # Override after we've parsed @terminals + rule.kind = :terminal if terminals # Override after we've parsed @terminals rule.orig = r @ast << rule end end + when :sxp + require 'sxp' unless defined?(SXP) + @ast = SXP::Reader::Basic.read(input).map {|e| Rule.from_sxp(e)} else raise "unknown input format #{format.inspect}" end + + validate! if @options[:validate] end + ## + # Validate the grammar. + # + # Makes sure that rules reference either strings or other defined rules. + # + # @raise [RangeError] + def validate! + ast.each do |rule| + begin + rule.validate!(@ast) + rescue SyntaxError => e + error("In rule #{rule.sym}: #{e.message}") + end + end + raise SyntaxError, errors.join("\n") unless errors.empty? + end + + ## + # Is the grammar valid? + # + # Uses `#validate!` and catches `RangeError` + # + # @return [Boolean] + def valid? + validate! + true + rescue SyntaxError + false + end + # Iterate over each rule or terminal, except empty # @param [:termina, :rule] kind # @yield rule # @yieldparam [Rule] rule def each(kind, &block) @@ -172,25 +202,29 @@ ## # Write out parsed syntax string as an S-Expression # @return [String] def to_sxp require 'sxp' unless defined?(SXP) - SXP::Generator.string(ast.sort_by{|r| r.id.to_f}.map(&:for_sxp)) + SXP::Generator.string(ast.map(&:for_sxp)) end ## # Output formatted EBNF + # + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [String] - def to_s - Writer.string(*ast) + def to_s(format: :ebnf) + Writer.string(*ast, format: format) end ## # Output formatted EBNF as HTML + # + # @param [:abnf, :ebnf, :isoebnf] format (:ebnf) # @return [String] - def to_html - Writer.html(*ast) + def to_html(format: :ebnf) + Writer.html(*ast, format: format) end ## # Output Ruby parser files # @@ -208,42 +242,37 @@ output.puts "module #{mod_name}" output.puts " START = #{self.start.inspect}\n" if self.start end # Either output LL(1) BRANCH tables or rules for PEG parsing - if ast.first.is_a?(EBNF::PEG::Rule) - to_ruby_peg(output) - else + if ast.first.first to_ruby_ll1(output) + else + to_ruby_peg(output) end unless output == $stdout output.puts "end" end end - def dup - new_obj = super - new_obj.instance_variable_set(:@ast, @ast.dup) - new_obj - end - ## - # Find a rule given a symbol - # @param [Symbol] sym - # @return [Rule] - def find_rule(sym) - (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym} + # Renumber, rule identifiers + def renumber! + ast.each_with_index do |rule, index| + rule.id = (index + 1).to_s + end end ## # Write out syntax tree as Turtle # @param [String] prefix for language # @param [String] ns URI for language # @return [String] def to_ttl(prefix = nil, ns = "http://example.org/") unless ast.empty? [ + "@prefix dc: <http://purl.org/dc/terms/>.", "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.", "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.", ("@prefix #{prefix}: <#{ns}>." if prefix), "@prefix : <#{ns}>.", "@prefix re: <http://www.w3.org/2000/10/swap/grammar/regex#>.", @@ -252,10 +281,24 @@ ":language rdfs:isDefinedBy <>; g:start :#{ast.first.id}.", "", ].compact end.join("\n") + - ast.sort.map(&:to_ttl).join("\n") + ast.map(&:to_ttl).join("\n") + end + + def dup + new_obj = super + new_obj.instance_variable_set(:@ast, @ast.dup) + new_obj + end + + ## + # Find a rule given a symbol + # @param [Symbol] sym + # @return [Rule] + def find_rule(sym) + (@find ||= {})[sym] ||= ast.detect {|r| r.sym == sym} end def depth @depth += 1 ret = yield \ No newline at end of file