lib/fbo/parser.rb in fbo-0.0.3 vs lib/fbo/parser.rb in fbo-0.1.0

- old
+ new

@@ -1,66 +1,63 @@ -require "strscan" +require 'treetop' +require 'fbo/node_extensions' +base_path = ::File.expand_path(::File.dirname(__FILE__)) +Treetop.load(::File.join(base_path, './dump.treetop')) + module FBO class Parser - TAG_PATTERN = /<\w+>/m - PATTERN = /<([A-Z]+)>.*<\/\1>/m - - def initialize(file, tag_pattern: TAG_PATTERN) + def initialize(file) @file = file - @tag_pattern = tag_pattern end - def notices - if @notices.nil? - parse_notices(split_file_contents) + def parse(data = nil) + data ||= @file.contents + + puts "Class = #{ data.class.name }" + if data.respond_to? :each + @tree = parse_collection(data) + else + @tree = parse_string(data) end - @notices + @tree end private - def split_file_contents - contents = @file.read - contents = cleanup_contents(contents) - scanner = StringScanner.new(contents) - text_notices = [] + def parser + @parser ||= FBO::DumpParser.new + end - while !scanner.eos? - initial_tag = scanner.check_until(@tag_pattern) - if initial_tag - initial_tag.strip! - else - break - end + def parse_string(data) + tree = parser.parse(data) + if tree.nil? + line = parser.failure_line + column = parser.failure_column + reason = parser.failure_reason + raise Exception, "Parse error at line #{ line }, column #{ column }: #{ reason }" + end + clean_tree(tree) + end - text_notice = scanner.scan_until(/#{ closing_tag_for(initial_tag) }\s*/) - if text_notice - text_notices << text_notice.strip + def parse_collection(data) + super_tree = nil + data.each do |string| + tree = parse_string(string) + if super_tree + super_tree = FBO::Dump::DumpNode.new(super_tree.input, super_tree.interval, + super_tree.elements + tree.elements) else - break + super_tree = tree end end - text_notices + super_tree end - - def parse_notices(text_notices) - @notices = [] - text_notices.each do |text| - handler = FBO::Parser::HandlerSelector.select(text) - @notices << handler.parse(text) - end - end - - def cleanup_contents(contents) - contents - .encode('UTF-16le', :invalid => :replace, :replace => '') - .encode('UTF-8') - .gsub(/\r\n/, "\n") - .gsub(/^M/, "") - end - - def closing_tag_for(tag) - tag.sub(/</, "</") + + def clean_tree(node) + return if node.elements.nil? + node.elements.delete_if { |node| node.class.name == "Treetop::Runtime::SyntaxNode" } + node.elements.each { |node| clean_tree(node) } + node end end end