lib/fbo/parser.rb in fbo-0.0.3 vs lib/fbo/parser.rb in fbo-0.1.0
- old
+ new
@@ -1,66 +1,63 @@
-require "strscan"
+require 'treetop'
+require 'fbo/node_extensions'
+base_path = ::File.expand_path(::File.dirname(__FILE__))
+Treetop.load(::File.join(base_path, './dump.treetop'))
+
module FBO
class Parser
- TAG_PATTERN = /<\w+>/m
- PATTERN = /<([A-Z]+)>.*<\/\1>/m
-
- def initialize(file, tag_pattern: TAG_PATTERN)
+ def initialize(file)
@file = file
- @tag_pattern = tag_pattern
end
- def notices
- if @notices.nil?
- parse_notices(split_file_contents)
+ def parse(data = nil)
+ data ||= @file.contents
+
+ puts "Class = #{ data.class.name }"
+ if data.respond_to? :each
+ @tree = parse_collection(data)
+ else
+ @tree = parse_string(data)
end
- @notices
+ @tree
end
private
- def split_file_contents
- contents = @file.read
- contents = cleanup_contents(contents)
- scanner = StringScanner.new(contents)
- text_notices = []
+ def parser
+ @parser ||= FBO::DumpParser.new
+ end
- while !scanner.eos?
- initial_tag = scanner.check_until(@tag_pattern)
- if initial_tag
- initial_tag.strip!
- else
- break
- end
+ def parse_string(data)
+ tree = parser.parse(data)
+ if tree.nil?
+ line = parser.failure_line
+ column = parser.failure_column
+ reason = parser.failure_reason
+ raise Exception, "Parse error at line #{ line }, column #{ column }: #{ reason }"
+ end
+ clean_tree(tree)
+ end
- text_notice = scanner.scan_until(/#{ closing_tag_for(initial_tag) }\s*/)
- if text_notice
- text_notices << text_notice.strip
+ def parse_collection(data)
+ super_tree = nil
+ data.each do |string|
+ tree = parse_string(string)
+ if super_tree
+ super_tree = FBO::Dump::DumpNode.new(super_tree.input, super_tree.interval,
+ super_tree.elements + tree.elements)
else
- break
+ super_tree = tree
end
end
- text_notices
+ super_tree
end
-
- def parse_notices(text_notices)
- @notices = []
- text_notices.each do |text|
- handler = FBO::Parser::HandlerSelector.select(text)
- @notices << handler.parse(text)
- end
- end
-
- def cleanup_contents(contents)
- contents
- .encode('UTF-16le', :invalid => :replace, :replace => '')
- .encode('UTF-8')
- .gsub(/\r\n/, "\n")
- .gsub(/^M/, "")
- end
-
- def closing_tag_for(tag)
- tag.sub(/</, "</")
+
+ def clean_tree(node)
+ return if node.elements.nil?
+ node.elements.delete_if { |node| node.class.name == "Treetop::Runtime::SyntaxNode" }
+ node.elements.each { |node| clean_tree(node) }
+ node
end
end
end