lib/sgf/parser.rb in SgfParser-1.0.0 vs lib/sgf/parser.rb in SgfParser-2.0.0
- old
+ new
@@ -1,120 +1,167 @@
require 'stringio'
module SGF
+
+ #The parser returns a SGF::Tree representation of the SGF file
+ #parser = SGF::Parser.new
+ #tree = parser.parse sgf_in_string_form
class Parser
- def initialize sgf
- @sgf = stringified(sgf)
- @tree = Tree.new sgf
+ NEW_NODE = ";"
+ BRANCHING = ["(", ")"]
+ PROPERTY = ["[", "]"]
+ NODE_DELIMITERS = [NEW_NODE].concat BRANCHING
+ LIST_IDENTITIES = ["AW", "AB", "AE", "AR", "CR", "DD",
+ "LB", "LN", "MA", "SL", "SQ", "TR", "VW",
+ "TB", "TW"]
+
+ # This takes as argument an SGF and returns an SGF::Tree object
+ # It accepts a local path (String), a stringified SGF (String),
+ # or a file handler (File).
+ # The second argument is optional, in case you don't want this to raise errors.
+ # You probably shouldn't use it, but who's gonna stop you?
+ def parse sgf, strict_parsing = true
+ @strict_parsing = strict_parsing
+ @stream = streamably_stringify sgf
+ @tree = Tree.new
@root = @tree.root
+ @current_node = @root
+ @branches = []
+ until @stream.eof?
+ case next_character
+ when "(" then open_branch
+ when ";" then
+ create_new_node
+ parse_node_data
+ add_properties_to_current_node
+ when ")" then close_branch
+ else next
+ end
+ end
+ @tree
end
- def stringified sgf
- File.exist?(sgf) ? File.read(sgf) : sgf
+ private
+
+ def streamably_stringify sgf
+ sgf = sgf.read if sgf.instance_of?(File)
+ sgf = File.read(sgf) if File.exist?(sgf)
+
+ check_for_errors_before_parsing sgf if @strict_parsing
+ StringIO.new clean(sgf), 'r'
end
- def parse
- while char = next_character
- case char
- when '(' then store_branch
- when ')' then fetch_branch
- when ';' then store_node_and_create_new_node
- when '[' then get_and_store_property
- else store_character(char)
- end
+ def check_for_errors_before_parsing string
+ msg = "The first two non-whitespace characters of the string should be (;"
+ unless string[/\A\s*\(\s*;/]
+ msg << " but they were #{string[0..1]} instead."
+ raise(SGF::MalformedDataError, msg)
end
- @tree
end
- def next_character
- character_available? && @stream.sysread(1)
+ def clean sgf
+ sgf.gsub! "\\\\n\\\\r", ''
+ sgf.gsub! "\\\\r\\\\n", ''
+ sgf.gsub! "\\\\r", ''
+ sgf.gsub! "\\\\n", ''
+ sgf
end
- def character_available?
- @stream ||= StringIO.new clean_string, 'r'
- !@stream.eof?
+ def open_branch
+ @branches.unshift @current_node
end
- def clean_string
- @sgf.gsub! "\\\\n\\\\r", ""
- @sgf.gsub! "\\\\r\\\\n", ""
- @sgf.gsub! "\\\\r", ""
- @sgf.gsub! "\\\\n", ""
- @sgf
- end
+ def close_branch
+ @current_node = @branches.shift
+ end
- def store_branch
- @branches ||= []
- @branches.unshift @current_node
+ def create_new_node
+ node = Node.new
+ @current_node.add_children node
+ @current_node = node
end
- def current_node
- @current_node ||= @root
- end
+ def parse_node_data
+ @node_properties = {}
+ while still_inside_node?
+ parse_identity
+ parse_property
+ @node_properties[@identity] = @property
+ end
+ end
- def fetch_branch
- @current_node = @branches.shift
- clear_temporary_data
+ def add_properties_to_current_node
+ @current_node.add_properties @node_properties
end
- def store_node_and_create_new_node
- parent = current_node
- @current_node = Node.new :parent => parent
- parent.add_properties content
- parent.add_children @current_node
- clear_temporary_data
+ def still_inside_node?
+ inside_a_node = false
+ while char = next_character
+ next if char[/\s/]
+ inside_a_node = !NODE_DELIMITERS.include?(char)
+ break
+ end
+ @stream.pos -= 1 if char
+ inside_a_node
end
- def get_and_store_property
- @content[@identity] ||= ""
- @content[@identity] << get_property
+ def parse_identity
@identity = ""
+ while char = next_character and char != "["
+ @identity << char unless char == "\n"
+ end
end
- def get_property
- buffer = ""
- while char = next_character
- case char
- when "]" then break unless multiple_properties?
- when "\\" then
- char << next_character
- char = "]" if char == "\\]"
- end
+ def parse_property
+ @property = ""
+ case @identity.upcase
+ when "C" then parse_comment
+ when *LIST_IDENTITIES then parse_multi_property
+ else parse_generic_property
+ end
+ end
- buffer << char
+ def parse_comment
+ while char = next_character and still_inside_comment? char
+ @property << char
end
- "[#{buffer}]"
+ @property.gsub! "\\]", "]"
end
- def multiple_properties?
- multiple_properties = false
- if char = next_character
- char = next_character if char == "\n"
- if char == "["
- multiple_properties = true
- end
- @stream.pos -= 1
- multiple_properties
+ def parse_multi_property
+ while char = next_character and still_inside_multi_property? char
+ @property << char
end
+ @property = @property.gsub("][", ",").split(",")
end
- def store_character(char)
- @identity << char unless char == "\n"
+ def parse_generic_property
+ while char = next_character and char != "]"
+ @property << char
+ end
end
- def clear_temporary_data
- @content.clear
- @identity = ""
+ def still_inside_comment? char
+ char != "]" || (char == "]" && @property[-1..-1] == "\\")
end
- def content
- @content ||= {}
+ def still_inside_multi_property? char
+ return true if char != "]"
+ inside_multi_property = false
+ while char = next_character
+ next if char[/\s/]
+ inside_multi_property = char == "["
+ break
+ end
+ @stream.pos -= 1 if char
+ inside_multi_property
end
- def identity
- @identity ||= ""
+ def next_character
+ !@stream.eof? && @stream.sysread(1)
end
end
+
end