# the scanner/lexer require 'forwardable' require 'strscan' require 'puppet' require 'puppet/util/methodhelper' module Puppet class LexError < RuntimeError; end end class Puppet::Pops::Parser::Lexer extend Forwardable attr_reader :file, :lexing_context, :token_queue attr_reader :locator attr_accessor :indefine alias :indefine? :indefine def lex_error msg raise Puppet::LexError.new(msg) end class Token ALWAYS_ACCEPTABLE = Proc.new { |context| true } include Puppet::Util::MethodHelper attr_accessor :regex, :name, :string, :skip, :skip_text alias skip? skip # @overload initialize(string) # @param string [String] a literal string token matcher # @param name [String] the token name (what it is known as in the grammar) # @param options [Hash] see {#set_options} # @overload initialize(regex) # @param regex [Regexp] a regular expression token text matcher # @param name [String] the token name (what it is known as in the grammar) # @param options [Hash] see {#set_options} # def initialize(string_or_regex, name, options = {}) if string_or_regex.is_a?(String) @name, @string = name, string_or_regex @regex = Regexp.new(Regexp.escape(string_or_regex)) else @name, @regex = name, string_or_regex end set_options(options) @acceptable_when = ALWAYS_ACCEPTABLE end # @return [String] human readable token reference; the String if literal, else the token name def to_s string or @name.to_s end # @return [Boolean] if the token is acceptable in the given context or not. # this implementation always returns true. # @param context [Hash] ? ? ? # def acceptable?(context={}) @acceptable_when.call(context) end # Defines when the token is able to match. # This provides context that cannot be expressed otherwise, such as feature flags. # # @param block [Proc] a proc that given a context returns a boolean def acceptable_when(block) @acceptable_when = block end end # Maintains a list of tokens. class TokenList extend Forwardable attr_reader :regex_tokens, :string_tokens def_delegator :@tokens, :[] # Adds a new token to the set of recognized tokens # @param name [String] the token name # @param regex [Regexp, String] source text token matcher, a litral string or regular expression # @param options [Hash] see {Token::set_options} # @param block [Proc] optional block set as the created tokens `convert` method # @raise [ArgumentError] if the token with the given name is already defined # def add_token(name, regex, options = {}, &block) raise(ArgumentError, "Token #{name} already exists") if @tokens.include?(name) token = Token.new(regex, name, options) @tokens[token.name] = token if token.string @string_tokens << token @tokens_by_string[token.string] = token else @regex_tokens << token end token.meta_def(:convert, &block) if block_given? token end # Creates an empty token list # def initialize @tokens = {} @regex_tokens = [] @string_tokens = [] @tokens_by_string = {} end # Look up a token by its literal (match) value, rather than name. # @param string [String, nil] the literal match string to obtain a {Token} for, or nil if it does not exist. def lookup(string) @tokens_by_string[string] end # Adds tokens from a hash where key is a matcher (literal string or regexp) and the # value is the token's name # @param hash [Hash<{String => Symbol}, Hash<{Regexp => Symbol}] map token text matcher to token name # @return [void] # def add_tokens(hash) hash.each do |regex, name| add_token(name, regex) end end # Sort literal (string-) tokens by length, so we know once we match, we're done. # This helps avoid the O(n^2) nature of token matching. # The tokens are sorted in place. # @return [void] def sort_tokens @string_tokens.sort! { |a, b| b.string.length <=> a.string.length } end # Yield each token name and value in turn. def each @tokens.each {|name, value| yield name, value } end end TOKENS = TokenList.new TOKENS.add_tokens( '[' => :LBRACK, ']' => :RBRACK, # '{' => :LBRACE, # Specialized to handle lambda '}' => :RBRACE, '(' => :LPAREN, ')' => :RPAREN, '=' => :EQUALS, '+=' => :APPENDS, '==' => :ISEQUAL, '>=' => :GREATEREQUAL, '>' => :GREATERTHAN, '<' => :LESSTHAN, '<=' => :LESSEQUAL, '!=' => :NOTEQUAL, '!' => :NOT, ',' => :COMMA, '.' => :DOT, ':' => :COLON, '@' => :AT, '|' => :PIPE, '<<|' => :LLCOLLECT, '|>>' => :RRCOLLECT, '->' => :IN_EDGE, '<-' => :OUT_EDGE, '~>' => :IN_EDGE_SUB, '<~' => :OUT_EDGE_SUB, '<|' => :LCOLLECT, '|>' => :RCOLLECT, ';' => :SEMIC, '?' => :QMARK, '\\' => :BACKSLASH, '=>' => :FARROW, '+>' => :PARROW, '+' => :PLUS, '-' => :MINUS, '/' => :DIV, '*' => :TIMES, '%' => :MODULO, '<<' => :LSHIFT, '>>' => :RSHIFT, '=~' => :MATCH, '!~' => :NOMATCH, %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF, "" => :STRING, "" => :DQPRE, "" => :DQMID, "" => :DQPOST, "" => :BOOLEAN, "" => :LAMBDA, # A LBRACE followed by '|' "