lib/sgf/parser.rb in SgfParser-1.0.0 vs lib/sgf/parser.rb in SgfParser-2.0.0

- old
+ new

@@ -1,120 +1,167 @@ require 'stringio' module SGF + + #The parser returns a SGF::Tree representation of the SGF file + #parser = SGF::Parser.new + #tree = parser.parse sgf_in_string_form class Parser - def initialize sgf - @sgf = stringified(sgf) - @tree = Tree.new sgf + NEW_NODE = ";" + BRANCHING = ["(", ")"] + PROPERTY = ["[", "]"] + NODE_DELIMITERS = [NEW_NODE].concat BRANCHING + LIST_IDENTITIES = ["AW", "AB", "AE", "AR", "CR", "DD", + "LB", "LN", "MA", "SL", "SQ", "TR", "VW", + "TB", "TW"] + + # This takes as argument an SGF and returns an SGF::Tree object + # It accepts a local path (String), a stringified SGF (String), + # or a file handler (File). + # The second argument is optional, in case you don't want this to raise errors. + # You probably shouldn't use it, but who's gonna stop you? + def parse sgf, strict_parsing = true + @strict_parsing = strict_parsing + @stream = streamably_stringify sgf + @tree = Tree.new @root = @tree.root + @current_node = @root + @branches = [] + until @stream.eof? + case next_character + when "(" then open_branch + when ";" then + create_new_node + parse_node_data + add_properties_to_current_node + when ")" then close_branch + else next + end + end + @tree end - def stringified sgf - File.exist?(sgf) ? File.read(sgf) : sgf + private + + def streamably_stringify sgf + sgf = sgf.read if sgf.instance_of?(File) + sgf = File.read(sgf) if File.exist?(sgf) + + check_for_errors_before_parsing sgf if @strict_parsing + StringIO.new clean(sgf), 'r' end - def parse - while char = next_character - case char - when '(' then store_branch - when ')' then fetch_branch - when ';' then store_node_and_create_new_node - when '[' then get_and_store_property - else store_character(char) - end + def check_for_errors_before_parsing string + msg = "The first two non-whitespace characters of the string should be (;" + unless string[/\A\s*\(\s*;/] + msg << " but they were #{string[0..1]} instead." + raise(SGF::MalformedDataError, msg) end - @tree end - def next_character - character_available? && @stream.sysread(1) + def clean sgf + sgf.gsub! "\\\\n\\\\r", '' + sgf.gsub! "\\\\r\\\\n", '' + sgf.gsub! "\\\\r", '' + sgf.gsub! "\\\\n", '' + sgf end - def character_available? - @stream ||= StringIO.new clean_string, 'r' - !@stream.eof? + def open_branch + @branches.unshift @current_node end - def clean_string - @sgf.gsub! "\\\\n\\\\r", "" - @sgf.gsub! "\\\\r\\\\n", "" - @sgf.gsub! "\\\\r", "" - @sgf.gsub! "\\\\n", "" - @sgf - end + def close_branch + @current_node = @branches.shift + end - def store_branch - @branches ||= [] - @branches.unshift @current_node + def create_new_node + node = Node.new + @current_node.add_children node + @current_node = node end - def current_node - @current_node ||= @root - end + def parse_node_data + @node_properties = {} + while still_inside_node? + parse_identity + parse_property + @node_properties[@identity] = @property + end + end - def fetch_branch - @current_node = @branches.shift - clear_temporary_data + def add_properties_to_current_node + @current_node.add_properties @node_properties end - def store_node_and_create_new_node - parent = current_node - @current_node = Node.new :parent => parent - parent.add_properties content - parent.add_children @current_node - clear_temporary_data + def still_inside_node? + inside_a_node = false + while char = next_character + next if char[/\s/] + inside_a_node = !NODE_DELIMITERS.include?(char) + break + end + @stream.pos -= 1 if char + inside_a_node end - def get_and_store_property - @content[@identity] ||= "" - @content[@identity] << get_property + def parse_identity @identity = "" + while char = next_character and char != "[" + @identity << char unless char == "\n" + end end - def get_property - buffer = "" - while char = next_character - case char - when "]" then break unless multiple_properties? - when "\\" then - char << next_character - char = "]" if char == "\\]" - end + def parse_property + @property = "" + case @identity.upcase + when "C" then parse_comment + when *LIST_IDENTITIES then parse_multi_property + else parse_generic_property + end + end - buffer << char + def parse_comment + while char = next_character and still_inside_comment? char + @property << char end - "[#{buffer}]" + @property.gsub! "\\]", "]" end - def multiple_properties? - multiple_properties = false - if char = next_character - char = next_character if char == "\n" - if char == "[" - multiple_properties = true - end - @stream.pos -= 1 - multiple_properties + def parse_multi_property + while char = next_character and still_inside_multi_property? char + @property << char end + @property = @property.gsub("][", ",").split(",") end - def store_character(char) - @identity << char unless char == "\n" + def parse_generic_property + while char = next_character and char != "]" + @property << char + end end - def clear_temporary_data - @content.clear - @identity = "" + def still_inside_comment? char + char != "]" || (char == "]" && @property[-1..-1] == "\\") end - def content - @content ||= {} + def still_inside_multi_property? char + return true if char != "]" + inside_multi_property = false + while char = next_character + next if char[/\s/] + inside_multi_property = char == "[" + break + end + @stream.pos -= 1 if char + inside_multi_property end - def identity - @identity ||= "" + def next_character + !@stream.eof? && @stream.sysread(1) end end + end