lib/sgf/parser.rb in SgfParser-2.0.0 vs lib/sgf/parser.rb in SgfParser-3.0.0

- old
+ new

@@ -1,167 +1,67 @@ -require 'stringio' +require_relative 'collection_assembler' +require_relative 'parsing_tokens' +require_relative 'error_checkers' +require_relative 'stream' -module SGF +# The parser returns a SGF::Collection representation of the SGF file +# parser = SGF::Parser.new +# collection = parser.parse sgf_in_string_form +class SGF::Parser + NEW_NODE = ";" + BRANCHING = %w{( )} + END_OF_FILE = false + NODE_DELIMITERS = [NEW_NODE].concat(BRANCHING).concat([END_OF_FILE]) + PROPERTY = %w([ ]) + LIST_IDENTITIES = %w(AW AB AE AR CR DD LB LN MA SL SQ TR VW TB TW) - #The parser returns a SGF::Tree representation of the SGF file - #parser = SGF::Parser.new - #tree = parser.parse sgf_in_string_form - class Parser - - NEW_NODE = ";" - BRANCHING = ["(", ")"] - PROPERTY = ["[", "]"] - NODE_DELIMITERS = [NEW_NODE].concat BRANCHING - LIST_IDENTITIES = ["AW", "AB", "AE", "AR", "CR", "DD", - "LB", "LN", "MA", "SL", "SQ", "TR", "VW", - "TB", "TW"] - - # This takes as argument an SGF and returns an SGF::Tree object - # It accepts a local path (String), a stringified SGF (String), - # or a file handler (File). - # The second argument is optional, in case you don't want this to raise errors. - # You probably shouldn't use it, but who's gonna stop you? - def parse sgf, strict_parsing = true - @strict_parsing = strict_parsing - @stream = streamably_stringify sgf - @tree = Tree.new - @root = @tree.root - @current_node = @root - @branches = [] - until @stream.eof? - case next_character - when "(" then open_branch - when ";" then - create_new_node - parse_node_data - add_properties_to_current_node - when ")" then close_branch - else next - end + # This takes as argument an SGF and returns an SGF::Collection object + # It accepts a local path (String), a stringified SGF (String), + # or a file handler (File). + # The second argument is optional, in case you don't want this to raise errors. + # You probably shouldn't use it, but who's gonna stop you? + def parse sgf, strict_parsing = true + error_checker = strict_parsing ? SGF::StrictErrorChecker.new : SGF::LaxErrorChecker.new + @sgf_stream = SGF::Stream.new(sgf, error_checker) + @assembler = SGF::CollectionAssembler.new + until @sgf_stream.eof? + case @sgf_stream.next_character + when "(" then @assembler.open_branch + when ";" then + parse_node_data + @assembler.create_node_with_properties @node_properties + when ")" then @assembler.close_branch + else next end - @tree end + @assembler.collection + end - private + private - def streamably_stringify sgf - sgf = sgf.read if sgf.instance_of?(File) - sgf = File.read(sgf) if File.exist?(sgf) - - check_for_errors_before_parsing sgf if @strict_parsing - StringIO.new clean(sgf), 'r' - end - - def check_for_errors_before_parsing string - msg = "The first two non-whitespace characters of the string should be (;" - unless string[/\A\s*\(\s*;/] - msg << " but they were #{string[0..1]} instead." - raise(SGF::MalformedDataError, msg) + def parse_node_data + @node_properties = {} + while still_inside_node? + identity = @sgf_stream.read_token SGF::IdentityToken.new + property_format = property_token_type identity + property = @sgf_stream.read_token property_format + if @node_properties[identity] + @node_properties[identity].concat property + @assembler.add_error "Multiple #{identity} identities are present in a single node. A property should only exist once per node." + else + @node_properties[identity] = property end end + end - def clean sgf - sgf.gsub! "\\\\n\\\\r", '' - sgf.gsub! "\\\\r\\\\n", '' - sgf.gsub! "\\\\r", '' - sgf.gsub! "\\\\n", '' - sgf - end + def still_inside_node? + !NODE_DELIMITERS.include?(@sgf_stream.peek_skipping_whitespace) + end - def open_branch - @branches.unshift @current_node + def property_token_type identity + case identity.upcase + when "C" then SGF::CommentToken.new + when *LIST_IDENTITIES then SGF::MultiPropertyToken.new + else SGF::GenericPropertyToken.new end - - def close_branch - @current_node = @branches.shift - end - - def create_new_node - node = Node.new - @current_node.add_children node - @current_node = node - end - - def parse_node_data - @node_properties = {} - while still_inside_node? - parse_identity - parse_property - @node_properties[@identity] = @property - end - end - - def add_properties_to_current_node - @current_node.add_properties @node_properties - end - - def still_inside_node? - inside_a_node = false - while char = next_character - next if char[/\s/] - inside_a_node = !NODE_DELIMITERS.include?(char) - break - end - @stream.pos -= 1 if char - inside_a_node - end - - def parse_identity - @identity = "" - while char = next_character and char != "[" - @identity << char unless char == "\n" - end - end - - def parse_property - @property = "" - case @identity.upcase - when "C" then parse_comment - when *LIST_IDENTITIES then parse_multi_property - else parse_generic_property - end - end - - def parse_comment - while char = next_character and still_inside_comment? char - @property << char - end - @property.gsub! "\\]", "]" - end - - def parse_multi_property - while char = next_character and still_inside_multi_property? char - @property << char - end - @property = @property.gsub("][", ",").split(",") - end - - def parse_generic_property - while char = next_character and char != "]" - @property << char - end - end - - def still_inside_comment? char - char != "]" || (char == "]" && @property[-1..-1] == "\\") - end - - def still_inside_multi_property? char - return true if char != "]" - inside_multi_property = false - while char = next_character - next if char[/\s/] - inside_multi_property = char == "[" - break - end - @stream.pos -= 1 if char - inside_multi_property - end - - def next_character - !@stream.eof? && @stream.sysread(1) - end - end - end -