module Puppet::Pops module Parser # This module is an integral part of the Lexer. # It handles scanning of EPP (Embedded Puppet), a form of string/expression interpolation similar to ERB. # require 'strscan' module EppSupport TOKEN_RENDER_STRING = [:RENDER_STRING, nil, 0] TOKEN_RENDER_EXPR = [:RENDER_EXPR, nil, 0] # Scans all of the content and returns it in an array # Note that the terminating [false, false] token is included in the result. # def fullscan_epp result = [] scan_epp {|token, value| result.push([token, value]) } result end # A block must be passed to scan. It will be called with two arguments, a symbol for the token, # and an instance of LexerSupport::TokenValue # PERFORMANCE NOTE: The TokenValue is designed to reduce the amount of garbage / temporary data # and to only convert the lexer's internal tokens on demand. It is slightly more costly to create an # instance of a class defined in Ruby than an Array or Hash, but the gain is much bigger since transformation # logic is avoided for many of its members (most are never used (e.g. line/pos information which is only of # value in general for error messages, and for some expressions (which the lexer does not know about). # def scan_epp # PERFORMANCE note: it is faster to access local variables than instance variables. # This makes a small but notable difference since instance member access is avoided for # every token in the lexed content. # scn = @scanner ctx = @lexing_context queue = @token_queue lex_error(Issues::EPP_INTERNAL_ERROR, :error => 'No string or file given to lexer to process.') unless scn ctx[:epp_mode] = :text enqueue_completed([:EPP_START, nil, 0], 0) interpolate_epp # This is the lexer's main loop until queue.empty? && scn.eos? do if token = queue.shift || lex_token yield [ ctx[:after] = token[0], token[1] ] end end if ctx[:epp_open_position] lex_error(Issues::EPP_UNBALANCED_TAG, {}, ctx[:epp_position]) end # Signals end of input yield [false, false] end def interpolate_epp(skip_leading=false) scn = @scanner ctx = @lexing_context eppscanner = EppScanner.new(scn) before = scn.pos s = eppscanner.scan(skip_leading) case eppscanner.mode when :text # Should be at end of scan, or something is terribly wrong unless @scanner.eos? lex_error(Issues::EPP_INTERNAL_ERROR, :error => 'template scanner returns text mode and is not and end of input') end if s # s may be nil if scanned text ends with an epp tag (i.e. no trailing text). enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end ctx[:epp_open_position] = nil # do nothing else, scanner is at the end when :error lex_error(eppscanner.issue) when :epp # It is meaningless to render empty string segments, and it is harmful to do this at # the start of the scan as it prevents specification of parameters with <%- ($x, $y) -%> # if s && s.length > 0 enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end # switch epp_mode to general (embedded) pp logic (non rendered result) ctx[:epp_mode] = :epp ctx[:epp_open_position] = scn.pos when :expr # It is meaningless to render an empty string segment if s && s.length > 0 enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end enqueue_completed(TOKEN_RENDER_EXPR, before) # switch mode to "epp expr interpolation" ctx[:epp_mode] = :expr ctx[:epp_open_position] = scn.pos else lex_error(Issues::EPP_INTERNAL_ERROR, :error => "Unknown mode #{eppscanner.mode} returned by template scanner") end nil end # A scanner specialized in processing text with embedded EPP (Embedded Puppet) tags. # The scanner is initialized with a StringScanner which it mutates as scanning takes place. # The intent is to use one instance of EppScanner per wanted scan, and this instance represents # the state after the scan. # # @example Sample usage # a = "some text <% pp code %> some more text" # scan = StringScanner.new(a) # eppscan = EppScanner.new(scan) # str = eppscan.scan # eppscan.mode # => :epp # eppscan.lines # => 0 # eppscan # # The scanner supports # * scanning text until <%, <%-, <%= # * while scanning text: # * tokens <%% and %%> are translated to <% and %> respetively and is returned as text. # * tokens <%# and %> (or ending with -%>) and the enclosed text is a comment and is not included in the returned text # * text following a comment that ends with -%> gets trailing whitespace (up to and including a line break) trimmed # and this whitespace is not included in the returned text. # * The continuation {#mode} is set to one of: # * `:epp` - for a <% token # * `:expr` - for a <%= token # * `:text` - when there was no continuation mode (e.g. when input ends with text) # * ':error` - if the tokens are unbalanced (reaching the end without a closing matching token). An error message # is then also available via the method {#message}. # # Note that the intent is to use this specialized scanner to scan the text parts, when continuation mode is `:epp` or `:expr` # the pp lexer should advance scanning (using the string scanner) until it reaches and consumes a `-%>` or '%>ยด token. If it # finds a `-%> token it should pass this on as a `skip_leading` parameter when it performs the next {#scan}. # class EppScanner # The original scanner used by the lexer/container using EppScanner attr_reader :scanner # The resulting mode after the scan. # The mode is one of `:text` (the initial mode), `:epp` embedded code (no output), `:expr` (embedded # expression), or `:error` # attr_reader :mode # An error issue if `mode == :error`, `nil` otherwise. attr_reader :issue # If the first scan should skip leading whitespace (typically detected by the pp lexer when the # pp mode end-token is found (i.e. `-%>`) and then passed on to the scanner. # attr_reader :skip_leading # Creates an EppScanner based on a StringScanner that represents the state where EppScanner should start scanning. # The given scanner will be mutated (i.e. position moved) to reflect the EppScanner's end state after a scan. # def initialize(scanner) @scanner = scanner end # Here for backwards compatibility. # @deprecated Use issue instead # @return [String] the issue message def message @issue.nil? ? nil : @issue.format end # Scans from the current position in the configured scanner, advances this scanner's position until the end # of the input, or to the first position after a mode switching token (`<%`, `<%-` or `<%=`). Number of processed # lines and continuation mode can be obtained via {#lines}, and {#mode}. # # @return [String, nil] the scanned and processed text, or nil if at the end of the input. # def scan(skip_leading=false) @mode = :text @skip_leading = skip_leading return nil if scanner.eos? s = "" until scanner.eos? part = @scanner.scan_until(/(<%)|\z/) if @skip_leading part.sub!(/^[ \t]*\r?(?:\n|\z)?/,'') @skip_leading = false end # The spec for %%> is to transform it into a literal %>. This is done here, as %%> otherwise would go # undetected in text mode. (i.e. it is not really necessary to escape %> with %%> in text mode unless # adding checks stating that a literal %> is illegal in text (unbalanced). # part.gsub!(/%%>/, '%>') s += part case @scanner.peek(1) when "" # at the end # if s ends with <% then this is an error (unbalanced <% %>) if s.end_with? "<%" @mode = :error @issue = Issues::EPP_UNBALANCED_EXPRESSION end return s when "-" # trim trailing whitespace on same line from accumulated s # return text and signal switch to pp mode @scanner.getch # drop the - s.sub!(/[ \t]*<%\z/, '') @mode = :epp return s when "%" # verbatim text # keep the scanned <%, and continue scanning after skipping one % # (i.e. do nothing here) @scanner.getch # drop the % to get a literal <% in the output when "=" # expression # return text and signal switch to expression mode # drop the scanned <%, and skip past -%>, or %>, but also skip %%> @scanner.getch # drop the = s.slice!(-2..-1) @mode = :expr return s when "#" # template comment # drop the scanned <%, and skip past -%>, or %>, but also skip %%> s.slice!(-2..-1) # unless there is an immediate termination i.e. <%#%> scan for the next %> that is not # preceded by a % (i.e. skip %%>) part = scanner.scan_until(/[^%]%>/) unless part @issue = Issues::EPP_UNBALANCED_COMMENT @mode = :error return s end # Always trim leading whitespace on the same line when there is a comment s.sub!(/[ \t]*\z/, '') @skip_leading = true if part.end_with?("-%>") # Continue scanning for more text else # Switch to pp after having removed the <% s.slice!(-2..-1) @mode = :epp return s end end end end end end end