#!/usr/bin/env ruby -w # encoding: UTF-8 #-- # Simple Declarative Language (SDL) for Ruby # Copyright 2005 Ikayzo, inc. # # This program is free software. You can distribute or modify it under the # terms of the GNU Lesser General Public License version 2.1 as published by # the Free Software Foundation. # # This program is distributed AS IS and WITHOUT WARRANTY. OF ANY KIND, # INCLUDING MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, contact the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #++ module SDL4R require 'stringio' require 'date' require 'sdl4r/sdl4r' require 'sdl4r/sdl_time_span' require 'sdl4r/sdl_binary' require 'sdl4r/tokenizer' require 'sdl4r/element' # Implementation of a pull parser for SDL designed after the model of Nokogiri::XML::Reader. # class Reader TYPE_ELEMENT = :ELEMENT TYPE_END_ELEMENT = :END_ELEMENT # @private def self.add_values_handler(map, handler) map[:NULL] = handler map[:INTEGER] = handler map[:FLOAT] = handler map[:BOOLEAN] = handler map[:CHARACTER] = handler map[:INLINE_BACKQUOTE_STRING] = handler map[:INLINE_DOUBLE_QUOTE_STRING] = handler map[:MULTILINE_BACKQUOTE_STRING_START] = handler map[:MULTILINE_DOUBLE_QUOTE_STRING_START] = handler map[:INLINE_BINARY] = handler map[:MULTILINE_BINARY_START] = handler map[:DATE] = handler map[:TIME_OR_TIMESPAN] = handler end # @private @@SKIP_PROC = lambda { |reader| false } # skips current token # @private @@ON_SELF_CLOSING_TAG_PROG = lambda { |reader| reader.on_self_closing_tag } # @private @@comment_handler_set = { :INLINE_COMMENT => lambda { |reader| reader.on_simple_comment }, :ONE_LINE_COMMENT => lambda { |reader| reader.on_simple_comment }, :MULTILINE_COMMENT_START => lambda { |reader| reader.on_multiline_comment }, } # Handlers that work the same at the top level or in any normal tag body. # @private @@common_tag_set = { :WHITESPACE => @@SKIP_PROC, :EOL => @@SKIP_PROC, :SEMICOLON => @@SKIP_PROC, :IDENTIFIER => lambda { |reader| reader.on_tag_start }, } # The handlers are object with #call() (like Proc, etc) that should return false if the # corresponding token is ignored, true otherwise. # @private # @@handler_sets = {} @@handler_sets[:top] = { :EOF => lambda { |reader| reader.on_eof }, } @@handler_sets[:top].merge!(@@common_tag_set) @@handler_sets[:top].merge!(@@comment_handler_set) add_values_handler(@@handler_sets[:top], lambda { |reader| reader.on_anonymous_value }) @@handler_sets[:tag_values] = { :WHITESPACE => @@SKIP_PROC, :LINE_CONTINUATION => @@SKIP_PROC, :IDENTIFIER => lambda { |reader| reader.on_attribute }, :EOL => @@ON_SELF_CLOSING_TAG_PROG, :SEMICOLON => @@ON_SELF_CLOSING_TAG_PROG, :BLOCK_START => lambda { |reader| reader.on_tag_body_start }, :EOF => @@ON_SELF_CLOSING_TAG_PROG, } @@handler_sets[:tag_values].merge!(@@comment_handler_set) add_values_handler(@@handler_sets[:tag_values], lambda { |reader| reader.on_value }) @@handler_sets[:tag_attributes] = { :WHITESPACE => @@SKIP_PROC, :LINE_CONTINUATION => @@SKIP_PROC, :IDENTIFIER => lambda { |reader| reader.on_attribute }, :EOL => @@ON_SELF_CLOSING_TAG_PROG, :SEMICOLON => @@ON_SELF_CLOSING_TAG_PROG, :BLOCK_START => lambda { |reader| reader.on_tag_body_start }, :EOF => @@ON_SELF_CLOSING_TAG_PROG, } @@handler_sets[:tag_attributes].merge!(@@comment_handler_set) @@handler_sets[:tag_body] = { :BLOCK_END => lambda { |reader| reader.on_tag_body_end }, } @@handler_sets[:tag_body].merge!(@@common_tag_set) @@handler_sets[:tag_body].merge!(@@comment_handler_set) add_values_handler(@@handler_sets[:tag_body], lambda { |reader| reader.on_anonymous_value }) @@handler_sets[:eof] = {} # @private @@value_handlers = { :NULL => lambda { |s, reader| nil }, :INTEGER => lambda { |s, reader| reader.parse_integer(s) }, :FLOAT => lambda { |s, reader| reader.parse_float(s) }, :BOOLEAN => lambda { |s, reader| (s =~ /\A(?:true|on)\Z/) ? true : false }, :CHARACTER => lambda { |s, reader| reader.parse_character(s) }, :INLINE_BACKQUOTE_STRING => lambda { |s, reader| s }, :INLINE_DOUBLE_QUOTE_STRING => lambda { |s, reader| reader.parse_double_quote_string(s) }, :MULTILINE_BACKQUOTE_STRING_START => lambda { |s, reader| reader.parse_multiline_backquote_string(s) }, :MULTILINE_DOUBLE_QUOTE_STRING_START => lambda { |s, reader| reader.parse_multiline_double_quote_string(s) }, :INLINE_BINARY => lambda { |s, reader| SdlBinary.decode64(s) }, :MULTILINE_BINARY_START => lambda { |s, reader| reader.parse_multiline_binary(s) }, :DATE => lambda { |s, reader| reader.parse_date(s) }, :TIME_OR_TIMESPAN => lambda { |s, reader| reader.parse_time_span(s) }, } # Type of the traversed SDL node (e.g. TYPE_ELEMENT). attr_reader :node_type # Prefix (namespace) of the traversed SDL node. attr_reader :prefix # Name of the traversed SDL node. attr_reader :name # Depth of the current SDL node. Depth of top nodes is 1 (0 would be the root that the Reader # doesn't traverse). attr_reader :depth def initialize(io) raise ArgumentError, "io == nil" if io.nil? raise ArgumentError, "io is not an IO" unless io.respond_to?(:gets) @io = io @tokenizer = Tokenizer.new(@io) @element = nil @element_pool = [] @depth = 1 clear_node() set_mode(:top) end # @return [Array] an array of the attributes structured as follows: # [ [["ns1", "attr1"], 123], [["", "attr2"], true] ] def attributes @element ? @element.attributes.clone : nil end # @return the value of the specified attribute. # # @overload attribute(name) # @overload attribute (prefix, name) def attribute(prefix, name = nil) return nil unless @element if name prefix, name = prefix.to_s, name.to_s else prefix, name = '', prefix.to_s end @element.attributes.each do |attr| return attr[1] if attr[0][0] == prefix && attr[0][1] == name end return nil end # @return the value of the attribute at the specified index. def attribute_at(index) if @element @element.attributes[index] else nil end end # @return [Integer] number of attributes in the current element def attribute_count @element ? @element.attributes.size : 0 end # @return [boolean] whether the current element has attributes. def attributes? @element && @element.attributes.size > 0 end # Calls the given block for each encountered Tag. The block is called when the Tag definition # is complete. # # @param [boolean] only_top_tags if true only top Tags are enumerated # @yield [Tag] called at each Tag # def each_tag(only_top_tags = false) stack = [] tag = nil # Only used during definition (values + attributes) while node = read case node.node_type when TYPE_ELEMENT tag = Tag.new @element.prefix, @element.name node.attributes.each do |attribute| tag.set_attribute(attribute[0][0], attribute[0][1], attribute[1]) end values = node.values tag.values = values if values stack.last.add_child(tag) unless stack.empty? if node.self_closing? yield tag if !only_top_tags or @depth <= 1 else stack << tag end tag = nil # definition ended here when TYPE_END_ELEMENT tag = stack.pop yield tag if !only_top_tags or depth <= 1 end end end def self_closing? @element ? @element.self_closing : false end def clear_node @node_type = nil @prefix = nil @name = nil end private :clear_node # @return the values of the current node, nil if there are none. def values if @element values = @element.values values.empty? ? nil : values.clone else @value end end alias_method :value, :values def values? if @element !@element.values.empty? else !@value.nil? end end alias_method :value?, :values? def self.from_io(io) self.new(io) end def self.from_memory(s) self.new(StringIO.new(s)) end # Enumerates all the parsed nodes and calls the given block. # # @yield [Reader] the current node # # @example # open("sample.sdl") do |io| # SDL4R::Reader.from_io(io).each do |node] # puts node.node_type # end # end # def each(&block) while node = self.read block.call(node) end end # Reads the next node in the SDL structure. # # @example # open("sample.sdl") do |io| # reader = SDL4R::Reader.from_io(io) # while node = reader.read # puts node.node_type # end # end # # @return [Reader] returns a Reader if a new node has been reached or +nil+ if the end of file # has been reached. def read clear_node node = nil while @tokenizer.read handler = @handler_set[@tokenizer.token_type] unless handler raise_unexpected_token end if handler.call(self) node = self if @node_type # otherwise, we reached the end of the file break end end node end # @private def raise_unexpected_token @tokenizer.raise_parse_error( "unexpected token #{@tokenizer.token_type} #{@tokenizer.token.inspect}", @tokenizer.token_line_no, @tokenizer.token_pos) end def set_mode(mode) handler_set = @@handler_sets[mode] raise ArgumentError, "unknown mode #{mode.to_s}" unless handler_set @mode = mode @handler_set = handler_set end protected :set_mode # Creates and returns the object representing a datetime (calls SDL4R#new_time by default). # Can be overriden. # # def new_time(year, month, day, hour, min, sec, msec, timezone_code) # Time.utc(year, month, day, hour, min, sec, msec, timezone_code) # end # def new_time(year, month, day, hour, min, sec, msec, timezone_code) SDL4R::new_time(year, month, day, hour, min, sec, msec, timezone_code) end # @private def on_simple_comment # :nodoc: # @node_type = TYPE_COMMENT false end # @private def on_eof # :nodoc: @node_type = nil true end # @private def on_multiline_comment # :nodoc: # @node_type = TYPE_COMMENT @value = @tokenizer.token while @tokenizer.read case @tokenizer.token_type when :EOL @value << ?\n when :MULTILINE_COMMENT_PART @value << @tokenizer.token when :MULTILINE_COMMENT_END @value << @tokenizer.token break else raise_unexpected_token end end false end # @private def on_tag_start # :nodoc: read_name set_mode :tag_values @element = Element.new @prefix, @name false end # @private def on_self_closing_tag # :nodoc: @node_type = TYPE_ELEMENT @prefix = @element.prefix @name = @element.name @element.self_closing = true set_mode(@depth <= 1 ? :top : :tag_body) end # @private def on_attribute # :nodoc: read_name read_equal read_value set_mode :tag_attributes @element.add_attribute(@prefix, @name, @value) false end # @private def on_value # :nodoc: read_value set_mode :tag_values @element.add_value(@value) false end # Should only be called from :top or :tag_body modes. # @private def on_anonymous_value # :nodoc: set_mode :tag_values @element = Element.new '', SDL4R::ANONYMOUS_TAG_NAME on_value end # @private def on_tag_body_start # :nodoc: @node_type = TYPE_ELEMENT @prefix = @element.prefix @name = @element.name @depth += 1 set_mode :tag_body true end # @private def on_tag_body_end # :nodoc: if @depth <= 1 raise "unexpected end of tag" end clear_node @node_type = TYPE_END_ELEMENT @depth -= 1 set_mode(@depth <= 1 ? :top : :tag_body) true end # @private def parse_double_quote_string(s) return s if s.empty? string = "" escaped = false s.each_char do |c| if escaped escaped = false case c when "\\", "\"" string << c when "n" string << ?\n when "r" string << ?\r when "t" string << ?\t else @tokenizer.raise_parse_error("Illegal escape character in string literal: '#{c}'.") end elsif c == "\\" escaped = true else string << c end end @tokenizer.raise_parse_error("orphan backslash") if escaped string end # @private def parse_multiline_string(first_string, part_token_type, end_token_type) string = "" string << first_string loop do case @tokenizer.read when :EOL # skip when part_token_type string << @tokenizer.token when end_token_type string << @tokenizer.token break else raise_unexpected_token end end string end private :parse_multiline_string # @private def parse_multiline_backquote_string(s) parse_multiline_string s, :MULTILINE_BACKQUOTE_STRING_PART, :MULTILINE_BACKQUOTE_STRING_END end # @private def parse_multiline_double_quote_string(s) parse_double_quote_string( parse_multiline_string( s, :MULTILINE_DOUBLE_QUOTE_STRING_PART, :MULTILINE_DOUBLE_QUOTE_STRING_END)) end # @private def parse_multiline_binary(s) literal = parse_multiline_string s, :MULTILINE_BINARY_PART, :MULTILINE_BINARY_END return SdlBinary.decode64(literal) end # @private def parse_character(s) case s when /\A.\Z/ s when "\\\\" "\\" when "\\'" "'" when "\\n" "\n" when "\\r" "\r" when "\\t" "\t" else raise "illegal character literal #{s.inspect}" end end # @private def parse_integer(s) if s =~ /\A([^L]+)L\Z/i return Integer($1) else return Integer(s) end end # @private def parse_float(s) if s =~ /\A([^BDF]+)BD\Z/i return BigDecimal($1) elsif s =~ /\A([^BDF]+)[FD]\Z/i return Float($1) rescue @tokenizer.raise_parse_error("not a float '#{$1}'") else return Float(s) rescue @tokenizer.raise_parse_error("not a float '#{s}'") end end # Parses the +literal+ into a returned Date object. # # Raises an ArgumentError if +literal+ has a bad format. # # @private def parse_date(literal) # here, we're being stricter than strptime() alone as we forbid trailing chars (also faster) if literal =~ /\A(-?\d+)\/(\d+)\/(\d+)\Z/ date_year = $1.to_i date_month = $2.to_i date_day = $3.to_i skip_whitespaces(false) # Check whether the next tag is the time part if @tokenizer.token_type == :TIME_OR_TIMESPAN # Is it a time or timespan? day, hour, min, sec, msec, zone = parse_time_span_and_time_zone(@tokenizer.token, true, true) if day @tokenizer.unread return Date.civil(date_year, date_month, date_day) else return new_time(date_year, date_month, date_day, hour, min, sec, msec, zone) end else @tokenizer.unread return Date.civil(date_year, date_month, date_day) end else raise ArgumentError, "Malformed Date <#{literal}>" end end # Parses +literal+ (String) into the corresponding SDLTimeSpan, which is then # returned. # # Raises an ArgumentError if the literal is not a correct timespan literal. # # @private def parse_time_span(literal) days, hours, minutes, seconds, milliseconds, zone_code = parse_time_span_and_time_zone(literal, true, false) if zone_code @tokenizer.raise_parse_error("got a time when expecting a timespan: \"#{literal}\"") end return SDL4R::SdlTimeSpan.new(days || 0, hours, minutes, seconds, milliseconds) end private # Parses the given literal into a returned array # [days, hours, minutes, seconds, milliseconds, zone_code]. # 'days', 'hours', 'minutes', 'seconds', 'milliseconds' are integers. # 'days' is +nil+ if not specified in +literal+. # 'seconds' and 'milliseconds' are equal to 0 if they're not specified in +literal+. # 'zone_code' (string) is equal to nil if not specified. # # +allowDays+ indicates whether the specification of days is allowed # in +literal+ # +allowTimeZone+ indicates whether the specification of the timeZone is # allowed in +literal+ # # All components are returned disregarding the values of +allowDays+ and # +allowTimeZone+. # # Raises an ArgumentError if +literal+ has a bad format. def parse_time_span_and_time_zone(literal, allowDays, allowTimeZone) overall_sign = (literal =~ /^-/)? -1 : +1 if literal =~ /\A(([+\-]?\d+)d:)/ if allowDays days = Integer($2) time_part = literal[($1.length)..-1] else # detected a day specification in a pure time literal raise ArgumentError, "unexpected day specification in #{literal}" end else days = nil time_part = literal end # We have to parse the string ourselves because AFAIK : # - strptime() can't parse milliseconds # - strptime() can't parse the time zone custom offset (CET+02:30) # - strptime() accepts trailing chars # (e.g. "12:24-xyz@" ==> "xyz@" is obviously wrong but strptime() # won't mind) if /\A([+-]?\d+):(\d+)(?::(\d+)(?:\.(\d+))?)? (?:-([a-zA-Z0-9\/_]+(?:[+\-]\d+(?::\d+)?)?))?\Z/ix =~ time_part hours = $1.to_i minutes = $2.to_i seconds = $3 ? $3.to_i : 0 milliseconds = if $4 millisecond_part = ($4)? $4.ljust(3, '0') : nil millisecond_part.to_i else 0 end if $5 and not allowTimeZone raise ArgumentError, "unexpected time zone specification in #{literal}" end zone_code = $5 # might be nil if not allowDays and $1 =~ /\A[+-]/ # unexpected timeSpan syntax raise ArgumentError, "unexpected sign on hours : #{literal}" end # take the sign into account if overall_sign == -1 hours = -hours if days # otherwise the sign is already applied to the hours minutes = -minutes seconds = -seconds milliseconds = -milliseconds end return [ days, hours, minutes, seconds, milliseconds, zone_code ] else raise ArgumentError, "bad time component : #{literal}" end end def skip_whitespaces(allow_line_continuations = false) while token_type = @tokenizer.read case token_type when :WHITESPACE # skip when :LINE_CONTINUATION break unless allow_line_continuations else break end end end # @private def read_equal skip_whitespaces(false) unless @tokenizer.token_type == :EQUAL raise_unexpected_token end skip_whitespaces(true) end # @private def read_name @name = @tokenizer.token if @tokenizer.read == :COLON # Namespace + Name (except syntax error) if @tokenizer.read == :IDENTIFIER @prefix = @name @name = @tokenizer.token else raise_unexpected_token end else # Just a Name, it seems @tokenizer.unread @prefix = '' end @name end # @private def read_value handler = @@value_handlers[@tokenizer.token_type] raise_unexpected_token unless handler @value = handler.call(@tokenizer.token, self) @value end end end