require 'spec_helper' require 'matchers/match_tokens2' require 'puppet/pops' require 'puppet/pops/parser/lexer2' module EgrammarLexer2Spec def tokens_scanned_from(s) lexer = Puppet::Pops::Parser::Lexer2.new lexer.string = s tokens = lexer.fullscan[0..-2] end def epp_tokens_scanned_from(s) lexer = Puppet::Pops::Parser::Lexer2.new lexer.string = s tokens = lexer.fullscan_epp[0..-2] end end describe 'Lexer2' do include EgrammarLexer2Spec { :LISTSTART => '[', :RBRACK => ']', :LBRACE => '{', :RBRACE => '}', :WSLPAREN => '(', # since it is first on a line it is special (LPAREN handled separately) :RPAREN => ')', :EQUALS => '=', :ISEQUAL => '==', :GREATEREQUAL => '>=', :GREATERTHAN => '>', :LESSTHAN => '<', :LESSEQUAL => '<=', :NOTEQUAL => '!=', :NOT => '!', :COMMA => ',', :DOT => '.', :COLON => ':', :AT => '@', :LLCOLLECT => '<<|', :RRCOLLECT => '|>>', :LCOLLECT => '<|', :RCOLLECT => '|>', :SEMIC => ';', :QMARK => '?', :OTHER => '\\', :FARROW => '=>', :PARROW => '+>', :APPENDS => '+=', :DELETES => '-=', :PLUS => '+', :MINUS => '-', :DIV => '/', :TIMES => '*', :LSHIFT => '<<', :RSHIFT => '>>', :MATCH => '=~', :NOMATCH => '!~', :IN_EDGE => '->', :OUT_EDGE => '<-', :IN_EDGE_SUB => '~>', :OUT_EDGE_SUB => '<~', :PIPE => '|', }.each do |name, string| it "should lex a token named #{name.to_s}" do expect(tokens_scanned_from(string)).to match_tokens2(name) end end it "should lex [ in position after non whitespace as LBRACK" do expect(tokens_scanned_from("a[")).to match_tokens2(:NAME, :LBRACK) end { "case" => :CASE, "class" => :CLASS, "default" => :DEFAULT, "define" => :DEFINE, # "import" => :IMPORT, # done as a function in egrammar "if" => :IF, "elsif" => :ELSIF, "else" => :ELSE, "inherits" => :INHERITS, "node" => :NODE, "and" => :AND, "or" => :OR, "undef" => :UNDEF, "false" => :BOOLEAN, "true" => :BOOLEAN, "in" => :IN, "unless" => :UNLESS, "private" => :PRIVATE, "type" => :TYPE, "attr" => :ATTR, "application" => :APPLICATION, "consumes" => :CONSUMES, "produces" => :PRODUCES, "site" => :SITE, }.each do |string, name| it "should lex a keyword from '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2(name) end end # TODO: Complete with all edge cases [ 'A', 'A::B', '::A', '::A::B',].each do |string| it "should lex a CLASSREF on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:CLASSREF, string]) end end # TODO: Complete with all edge cases [ 'a', 'a::b', '::a', '::a::b',].each do |string| it "should lex a NAME on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NAME, string]) end end [ 'a-b', 'a--b', 'a-b-c', '_x'].each do |string| it "should lex a BARE WORD STRING on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:WORD, string]) end end [ '_x::y', 'x::_y'].each do |string| it "should consider the bare word '#{string}' to be a WORD" do expect(tokens_scanned_from(string)).to match_tokens2(:WORD) end end { '-a' => [:MINUS, :NAME], '--a' => [:MINUS, :MINUS, :NAME], 'a-' => [:NAME, :MINUS], 'a- b' => [:NAME, :MINUS, :NAME], 'a--' => [:NAME, :MINUS, :MINUS], 'a-$3' => [:NAME, :MINUS, :VARIABLE], }.each do |source, expected| it "should lex leading and trailing hyphens from #{source}" do expect(tokens_scanned_from(source)).to match_tokens2(*expected) end end { 'false'=>false, 'true'=>true}.each do |string, value| it "should lex a BOOLEAN on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:BOOLEAN, value]) end end [ '0', '1', '2982383139'].each do |string| it "should lex a decimal integer NUMBER on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NUMBER, string]) end end { ' 1' => '1', '1 ' => '1', ' 1 ' => '1'}.each do |string, value| it "should lex a NUMBER with surrounding space '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NUMBER, value]) end end [ '0.0', '0.1', '0.2982383139', '29823.235', '10e23', '10e-23', '1.234e23'].each do |string| it "should lex a decimal floating point NUMBER on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NUMBER, string]) end end [ '00', '01', '0123', '0777'].each do |string| it "should lex an octal integer NUMBER on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NUMBER, string]) end end [ '0x0', '0x1', '0xa', '0xA', '0xabcdef', '0xABCDEF'].each do |string| it "should lex an hex integer NUMBER on the form '#{string}'" do expect(tokens_scanned_from(string)).to match_tokens2([:NUMBER, string]) end end { "''" => '', "'a'" => 'a', "'a\\'b'" =>"a'b", "'a\\rb'" =>"a\\rb", "'a\\nb'" =>"a\\nb", "'a\\tb'" =>"a\\tb", "'a\\sb'" =>"a\\sb", "'a\\$b'" =>"a\\$b", "'a\\\"b'" =>"a\\\"b", "'a\\\\b'" =>"a\\b", "'a\\\\'" =>"a\\", }.each do |source, expected| it "should lex a single quoted STRING on the form #{source}" do expect(tokens_scanned_from(source)).to match_tokens2([:STRING, expected]) end end { "''" => [2, ""], "'a'" => [3, "a"], "'a\\'b'" => [6, "a'b"], }.each do |source, expected| it "should lex a single quoted STRING on the form #{source} as having length #{expected[0]}" do length, value = expected expect(tokens_scanned_from(source)).to match_tokens2([:STRING, value, {:line => 1, :pos=>1, :length=> length}]) end end { '""' => '', '"a"' => 'a', '"a\'b"' => "a'b", }.each do |source, expected| it "should lex a double quoted STRING on the form #{source}" do expect(tokens_scanned_from(source)).to match_tokens2([:STRING, expected]) end end { '"a$x b"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>2 }], [:VARIABLE, 'x', {:line => 1, :pos=>3, :length=>2 }], [:DQPOST, ' b', {:line => 1, :pos=>5, :length=>3 }]], '"a$x.b"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>2 }], [:VARIABLE, 'x', {:line => 1, :pos=>3, :length=>2 }], [:DQPOST, '.b', {:line => 1, :pos=>5, :length=>3 }]], '"$x.b"' => [[:DQPRE, '', {:line => 1, :pos=>1, :length=>1 }], [:VARIABLE, 'x', {:line => 1, :pos=>2, :length=>2 }], [:DQPOST, '.b', {:line => 1, :pos=>4, :length=>3 }]], '"a$x"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>2 }], [:VARIABLE, 'x', {:line => 1, :pos=>3, :length=>2 }], [:DQPOST, '', {:line => 1, :pos=>5, :length=>1 }]], '"a${x}"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>4 }], [:VARIABLE, 'x', {:line => 1, :pos=>5, :length=>1 }], [:DQPOST, '', {:line => 1, :pos=>7, :length=>1 }]], '"a${_x}"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>4 }], [:VARIABLE, '_x', {:line => 1, :pos=>5, :length=>2 }], [:DQPOST, '', {:line => 1, :pos=>8, :length=>1 }]], '"a${y::_x}"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>4 }], [:VARIABLE, 'y::_x', {:line => 1, :pos=>5, :length=>5 }], [:DQPOST, '', {:line => 1, :pos=>11, :length=>1 }]], '"a${_x[1]}"' => [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>4 }], [:VARIABLE, '_x', {:line => 1, :pos=>5, :length=>2 }], [:LBRACK, '[', {:line => 1, :pos=>7, :length=>1 }], [:NUMBER, '1', {:line => 1, :pos=>8, :length=>1 }], [:RBRACK, ']', {:line => 1, :pos=>9, :length=>1 }], [:DQPOST, '', {:line => 1, :pos=>11, :length=>1 }]], '"a${_x.foo}"'=> [[:DQPRE, 'a', {:line => 1, :pos=>1, :length=>4 }], [:VARIABLE, '_x', {:line => 1, :pos=>5, :length=>2 }], [:DOT, '.', {:line => 1, :pos=>7, :length=>1 }], [:NAME, 'foo', {:line => 1, :pos=>8, :length=>3 }], [:DQPOST, '', {:line => 1, :pos=>12, :length=>1 }]], }.each do |source, expected| it "should lex an interpolated variable 'x' from #{source}" do expect(tokens_scanned_from(source)).to match_tokens2(*expected) end end { '"$"' => '$', '"a$"' => 'a$', '"a$%b"' => "a$%b", '"a$$"' => "a$$", '"a$$%"' => "a$$%", }.each do |source, expected| it "should lex interpolation including false starts #{source}" do expect(tokens_scanned_from(source)).to match_tokens2([:STRING, expected]) end end it "differentiates between foo[x] and foo [x] (whitespace)" do expect(tokens_scanned_from("$a[1]")).to match_tokens2(:VARIABLE, :LBRACK, :NUMBER, :RBRACK) expect(tokens_scanned_from("$a [1]")).to match_tokens2(:VARIABLE, :LISTSTART, :NUMBER, :RBRACK) expect(tokens_scanned_from("a[1]")).to match_tokens2(:NAME, :LBRACK, :NUMBER, :RBRACK) expect(tokens_scanned_from("a [1]")).to match_tokens2(:NAME, :LISTSTART, :NUMBER, :RBRACK) end it "differentiates between '(' first on line, and not first on line" do expect(tokens_scanned_from("(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("\n(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("\n\r(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("\n\t(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("\n\r\t(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("\n\u00a0(")).to match_tokens2(:WSLPAREN) expect(tokens_scanned_from("x(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("\nx(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("\n\rx(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("\n\tx(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("\n\r\tx(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("\n\u00a0x(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("x (")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("x\t(")).to match_tokens2(:NAME, :LPAREN) expect(tokens_scanned_from("x\u00a0(")).to match_tokens2(:NAME, :LPAREN) end it "skips whitepsace" do expect(tokens_scanned_from(" if if if ")).to match_tokens2(:IF, :IF, :IF) expect(tokens_scanned_from(" if \n\r\t\nif if ")).to match_tokens2(:IF, :IF, :IF) expect(tokens_scanned_from(" if \n\r\t\n\u00a0if\u00a0 if ")).to match_tokens2(:IF, :IF, :IF) end it "skips single line comments" do expect(tokens_scanned_from("if # comment\nif")).to match_tokens2(:IF, :IF) end ["if /* comment */\nif", "if /* comment\n */\nif", "if /*\n comment\n */\nif", ].each do |source| it "skips multi line comments" do expect(tokens_scanned_from(source)).to match_tokens2(:IF, :IF) end end it 'detects unterminated multiline comment' do expect { tokens_scanned_from("/* not terminated\nmultiline\ncomment") }.to raise_error(Puppet::ParseErrorWithIssue) { |e| expect(e.issue_code).to be(Puppet::Pops::Issues::UNCLOSED_MLCOMMENT.issue_code) } end { "=~" => [:MATCH, "=~ /./"], "!~" => [:NOMATCH, "!~ /./"], "," => [:COMMA, ", /./"], "(" => [:WSLPAREN, "( /./"], "x (" => [[:NAME, :LPAREN], "x ( /./"], "x\\t (" => [[:NAME, :LPAREN], "x\t ( /./"], "[ (liststart)" => [:LISTSTART, "[ /./"], "[ (LBRACK)" => [[:NAME, :LBRACK], "a[ /./"], "[ (liststart after name)" => [[:NAME, :LISTSTART], "a [ /./"], "{" => [:LBRACE, "{ /./"], "+" => [:PLUS, "+ /./"], "-" => [:MINUS, "- /./"], "*" => [:TIMES, "* /./"], ";" => [:SEMIC, "; /./"], }.each do |token, entry| it "should lex regexp after '#{token}'" do expected = [entry[0], :REGEX].flatten expect(tokens_scanned_from(entry[1])).to match_tokens2(*expected) end end it "should lex a simple expression" do expect(tokens_scanned_from('1 + 1')).to match_tokens2([:NUMBER, '1'], :PLUS, [:NUMBER, '1']) end { "1" => ["1 /./", [:NUMBER, :DIV, :DOT, :DIV]], "'a'" => ["'a' /./", [:STRING, :DIV, :DOT, :DIV]], "true" => ["true /./", [:BOOLEAN, :DIV, :DOT, :DIV]], "false" => ["false /./", [:BOOLEAN, :DIV, :DOT, :DIV]], "/./" => ["/./ /./", [:REGEX, :DIV, :DOT, :DIV]], "a" => ["a /./", [:NAME, :DIV, :DOT, :DIV]], "A" => ["A /./", [:CLASSREF, :DIV, :DOT, :DIV]], ")" => [") /./", [:RPAREN, :DIV, :DOT, :DIV]], "]" => ["] /./", [:RBRACK, :DIV, :DOT, :DIV]], "|>" => ["|> /./", [:RCOLLECT, :DIV, :DOT, :DIV]], "|>>" => ["|>> /./", [:RRCOLLECT, :DIV, :DOT, :DIV]], "$x" => ["$x /1/", [:VARIABLE, :DIV, :NUMBER, :DIV]], "a-b" => ["a-b /1/", [:WORD, :DIV, :NUMBER, :DIV]], '"a$a"' => ['"a$a" /./', [:DQPRE, :VARIABLE, :DQPOST, :DIV, :DOT, :DIV]], }.each do |token, entry| it "should not lex regexp after '#{token}'" do expect(tokens_scanned_from(entry[ 0 ])).to match_tokens2(*entry[ 1 ]) end end it 'should lex assignment' do expect(tokens_scanned_from("$a = 10")).to match_tokens2([:VARIABLE, "a"], :EQUALS, [:NUMBER, '10']) end # TODO: Tricky, and heredoc not supported yet # it "should not lex regexp after heredoc" do # tokens_scanned_from("1 / /./").should match_tokens2(:NUMBER, :DIV, :REGEX) # end it "should lex regexp at beginning of input" do expect(tokens_scanned_from(" /./")).to match_tokens2(:REGEX) end it "should lex regexp right of div" do expect(tokens_scanned_from("1 / /./")).to match_tokens2(:NUMBER, :DIV, :REGEX) end it 'should lex regexp with escaped slash' do scanned = tokens_scanned_from('/\//') expect(scanned).to match_tokens2(:REGEX) expect(scanned[0][1][:value]).to eql(Regexp.new('/')) end it 'should lex regexp with escaped backslash' do scanned = tokens_scanned_from('/\\\\/') expect(scanned).to match_tokens2(:REGEX) expect(scanned[0][1][:value]).to eql(Regexp.new('\\\\')) end it 'should lex regexp with escaped backslash followed escaped slash ' do scanned = tokens_scanned_from('/\\\\\\//') expect(scanned).to match_tokens2(:REGEX) expect(scanned[0][1][:value]).to eql(Regexp.new('\\\\/')) end it 'should lex regexp with escaped slash followed escaped backslash ' do scanned = tokens_scanned_from('/\\/\\\\/') expect(scanned).to match_tokens2(:REGEX) expect(scanned[0][1][:value]).to eql(Regexp.new('/\\\\')) end it 'should not lex regexp with escaped ending slash' do expect(tokens_scanned_from('/\\/')).to match_tokens2(:DIV, :OTHER, :DIV) end it "should accept newline in a regular expression" do scanned = tokens_scanned_from("/\n.\n/") # Note that strange formatting here is important expect(scanned[0][1][:value]).to eql(/ . /) end context 'when lexer lexes heredoc' do it 'lexes tag, syntax and escapes, margin and right trim' do code = <<-CODE @(END:syntax/t) Tex\\tt\\n |- END CODE expect(tokens_scanned_from(code)).to match_tokens2([:HEREDOC, 'syntax'], :SUBLOCATE, [:STRING, "Tex\tt\\n"]) end it 'lexes "tag", syntax and escapes, margin, right trim and interpolation' do code = <<-CODE @("END":syntax/t) Tex\\tt\\n$var After |- END CODE expect(tokens_scanned_from(code)).to match_tokens2( [:HEREDOC, 'syntax'], :SUBLOCATE, [:DQPRE, "Tex\tt\\n"], [:VARIABLE, "var"], [:DQPOST, " After"] ) end context 'with bad syntax' do def expect_issue(code, issue) expect { tokens_scanned_from(code) }.to raise_error(Puppet::ParseErrorWithIssue) { |e| expect(e.issue_code).to be(issue.issue_code) } end it 'detects and reports HEREDOC_UNCLOSED_PARENTHESIS' do code = <<-CODE @(END:syntax/t Text |- END CODE expect_issue(code, Puppet::Pops::Issues::HEREDOC_UNCLOSED_PARENTHESIS) end it 'detects and reports HEREDOC_WITHOUT_END_TAGGED_LINE' do code = <<-CODE @(END:syntax/t) Text CODE expect_issue(code, Puppet::Pops::Issues::HEREDOC_WITHOUT_END_TAGGED_LINE) end it 'detects and reports HEREDOC_INVALID_ESCAPE' do code = <<-CODE @(END:syntax/x) Text |- END CODE expect_issue(code, Puppet::Pops::Issues::HEREDOC_INVALID_ESCAPE) end it 'detects and reports HEREDOC_INVALID_SYNTAX' do code = <<-CODE @(END:syntax/t/p) Text |- END CODE expect_issue(code, Puppet::Pops::Issues::HEREDOC_INVALID_SYNTAX) end it 'detects and reports HEREDOC_WITHOUT_TEXT' do code = '@(END:syntax/t)' expect_issue(code, Puppet::Pops::Issues::HEREDOC_WITHOUT_TEXT) end it 'detects and reports HEREDOC_MULTIPLE_AT_ESCAPES' do code = <<-CODE @(END:syntax/tst) Tex\\tt\\n |- END CODE expect_issue(code, Puppet::Pops::Issues::HEREDOC_MULTIPLE_AT_ESCAPES) end end end context 'when not given multi byte characters' do it 'produces byte offsets for tokens' do code = <<-"CODE" 1 2\n3 CODE expect(tokens_scanned_from(code)).to match_tokens2( [:NUMBER, '1', {:line => 1, :offset => 0, :length=>1}], [:NUMBER, '2', {:line => 1, :offset => 2, :length=>1}], [:NUMBER, '3', {:line => 2, :offset => 4, :length=>1}] ) end end context 'when dealing with multi byte characters' do it 'should support unicode characters' do code = <<-CODE "x\\u2713y" CODE # >= Ruby 1.9.3 reports \u expect(tokens_scanned_from(code)).to match_tokens2([:STRING, "x\u2713y"]) end it 'should support adjacent short form unicode characters' do code = <<-CODE "x\\u2713\\u2713y" CODE # >= Ruby 1.9.3 reports \u expect(tokens_scanned_from(code)).to match_tokens2([:STRING, "x\u2713\u2713y"]) end it 'should support unicode characters in long form' do code = <<-CODE "x\\u{1f452}y" CODE expect(tokens_scanned_from(code)).to match_tokens2([:STRING, "x\u{1f452}y"]) end it 'can escape the unicode escape' do code = <<-"CODE" "x\\\\u{1f452}y" CODE expect(tokens_scanned_from(code)).to match_tokens2([:STRING, "x\\u{1f452}y"]) end it 'produces byte offsets that counts each byte in a comment' do code = <<-"CODE" # \u{0400}\na CODE expect(tokens_scanned_from(code.strip)).to match_tokens2([:NAME, 'a', {:line => 2, :offset => 5, :length=>1}]) end it 'produces byte offsets that counts each byte in value token' do code = <<-"CODE" '\u{0400}'\na CODE expect(tokens_scanned_from(code.strip)).to match_tokens2( [:STRING, "\u{400}", {:line => 1, :offset => 0, :length=>4}], [:NAME, 'a', {:line => 2, :offset => 5, :length=>1}] ) end it 'should not select LISTSTART token when preceded by multibyte chars' do # This test is sensitive to the number of multibyte characters and position of the expressions # within the string - it is designed to fail if the position is calculated on the byte offset of the '[' # instead of the char offset. # code = "$a = '\u00f6\u00fc\u00fc\u00fc\u00fc\u00e4\u00e4\u00f6\u00e4'\nnotify {'x': message => B['dkda'] }\n" expect(tokens_scanned_from(code)).to match_tokens2( :VARIABLE, :EQUALS, :STRING, [:NAME, 'notify'], :LBRACE, [:STRING, 'x'], :COLON, :NAME, :FARROW, :CLASSREF, :LBRACK, :STRING, :RBRACK, :RBRACE) end end context 'when lexing epp' do it 'epp can contain just text' do code = <<-CODE This is just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2(:EPP_START, [:RENDER_STRING, " This is just text\n"]) end it 'epp can contain text with interpolated rendered expressions' do code = <<-CODE This is <%= $x %> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:RENDER_EXPR, nil], [:VARIABLE, "x"], [:EPP_END, "%>"], [:RENDER_STRING, " just text\n"] ) end it 'epp can contain text with trimmed interpolated rendered expressions' do code = <<-CODE This is <%= $x -%> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:RENDER_EXPR, nil], [:VARIABLE, "x"], [:EPP_END_TRIM, "-%>"], [:RENDER_STRING, "just text\n"] ) end it 'epp can contain text with expressions that are not rendered' do code = <<-CODE This is <% $x=10 %> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, " just text\n"] ) end it 'epp can skip leading space in tail text' do code = <<-CODE This is <% $x=10 -%> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, " just text\n"] ) end it 'epp can skip comments' do code = <<-CODE This is <% $x=10 -%> <%# This is an epp comment -%> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, " just text\n"] ) end it 'epp comments strips left whitespace when preceding is right trim' do code = <<-CODE This is <% $x=10 -%> space-before-me-but-not-after <%# This is an epp comment %> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, " space-before-me-but-not-after\n just text\n"] ) end it 'epp comments strips left whitespace on same line when preceding is not right trim' do code = <<-CODE This is <% $x=10 %> <%# This is an epp comment -%> just text CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, "\n just text\n"] ) end it 'epp can escape epp tags' do code = <<-CODE This is <% $x=10 -%> <%% this is escaped epp %%> CODE expect(epp_tokens_scanned_from(code)).to match_tokens2( :EPP_START, [:RENDER_STRING, " This is "], [:VARIABLE, "x"], :EQUALS, [:NUMBER, "10"], [:RENDER_STRING, " <% this is escaped epp %>\n"] ) end context 'with bad epp syntax' do def expect_issue(code, issue) expect { epp_tokens_scanned_from(code) }.to raise_error(Puppet::ParseErrorWithIssue) { |e| expect(e.issue_code).to be(issue.issue_code) } end it 'detects and reports EPP_UNBALANCED_TAG' do expect_issue('<% asf', Puppet::Pops::Issues::EPP_UNBALANCED_TAG) end it 'detects and reports EPP_UNBALANCED_COMMENT' do expect_issue('<%# asf', Puppet::Pops::Issues::EPP_UNBALANCED_COMMENT) end it 'detects and reports EPP_UNBALANCED_EXPRESSION' do expect_issue('asf <%', Puppet::Pops::Issues::EPP_UNBALANCED_EXPRESSION) end end end context 'when parsing bad code' do def expect_issue(code, issue) expect { tokens_scanned_from(code) }.to raise_error(Puppet::ParseErrorWithIssue) do |e| expect(e.issue_code).to be(issue.issue_code) end end it 'detects and reports issue ILLEGAL_CLASS_REFERENCE' do expect_issue('A::3', Puppet::Pops::Issues::ILLEGAL_CLASS_REFERENCE) end it 'detects and reports issue ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE' do expect_issue('::A::3', Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE) end it 'detects and reports issue ILLEGAL_FULLY_QUALIFIED_NAME' do expect_issue('::a::3', Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_NAME) end it 'detects and reports issue ILLEGAL_NUMBER' do expect_issue('3g', Puppet::Pops::Issues::ILLEGAL_NUMBER) end it 'detects and reports issue INVALID_HEX_NUMBER' do expect_issue('0x3g', Puppet::Pops::Issues::INVALID_HEX_NUMBER) end it 'detects and reports issue INVALID_OCTAL_NUMBER' do expect_issue('038', Puppet::Pops::Issues::INVALID_OCTAL_NUMBER) end it 'detects and reports issue INVALID_DECIMAL_NUMBER' do expect_issue('4.3g', Puppet::Pops::Issues::INVALID_DECIMAL_NUMBER) end it 'detects and reports issue NO_INPUT_TO_LEXER' do expect { Puppet::Pops::Parser::Lexer2.new.fullscan }.to raise_error(Puppet::ParseErrorWithIssue) { |e| expect(e.issue_code).to be(Puppet::Pops::Issues::NO_INPUT_TO_LEXER.issue_code) } end it 'detects and reports issue UNCLOSED_QUOTE' do expect_issue('"asd', Puppet::Pops::Issues::UNCLOSED_QUOTE) end end context 'when dealing with non UTF-8 and Byte Order Marks (BOMs)' do { 'UTF_8' => [0xEF, 0xBB, 0xBF], 'UTF_16_1' => [0xFE, 0xFF], 'UTF_16_2' => [0xFF, 0xFE], 'UTF_32_1' => [0x00, 0x00, 0xFE, 0xFF], 'UTF_32_2' => [0xFF, 0xFE, 0x00, 0x00], 'UTF_1' => [0xF7, 0x64, 0x4C], 'UTF_EBCDIC' => [0xDD, 0x73, 0x66, 0x73], 'SCSU' => [0x0E, 0xFE, 0xFF], 'BOCU' => [0xFB, 0xEE, 0x28], 'GB_18030' => [0x84, 0x31, 0x95, 0x33] }.each do |key, bytes| it "errors on the byte order mark for #{key} '[#{bytes.map() {|b| '%X' % b}.join(' ')}]'" do format_name = key.split('_')[0,2].join('-') bytes_str = "\\[#{bytes.map {|b| '%X' % b}.join(' ')}\\]" fix = " - remove these from the puppet source" expect { tokens_scanned_from(bytes.pack('C*')) }.to raise_error(Puppet::ParseErrorWithIssue, /Illegal #{format_name} .* at beginning of input: #{bytes_str}#{fix}/) end it "can use a possibly 'broken' UTF-16 string without problems for #{key}" do format_name = key.split('_')[0,2].join('-') string = bytes.pack('C*').force_encoding('UTF-16') bytes_str = "\\[#{string.bytes.map {|b| '%X' % b}.join(' ')}\\]" fix = " - remove these from the puppet source" expect { tokens_scanned_from(string) }.to raise_error(Puppet::ParseErrorWithIssue, /Illegal #{format_name} .* at beginning of input: #{bytes_str}#{fix}/) end end end end describe Puppet::Pops::Parser::Lexer2 do include PuppetSpec::Files # First line of Rune version of Rune poem at http://www.columbia.edu/~fdc/utf8/ # characters chosen since they will not parse on Windows with codepage 437 or 1252 # Section 3.2.1.3 of Ruby spec guarantees that \u strings are encoded as UTF-8 # Runes (may show up as garbage if font is not available): ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ let (:rune_utf8) { "\u16A0\u16C7\u16BB\u16EB\u16D2\u16E6\u16A6\u16EB\u16A0\u16B1\u16A9\u16A0\u16A2" "\u16B1\u16EB\u16A0\u16C1\u16B1\u16AA\u16EB\u16B7\u16D6\u16BB\u16B9\u16E6\u16DA" "\u16B3\u16A2\u16D7" } context 'when lexing files from disk' do it 'should always read files as UTF-8' do if Puppet.features.microsoft_windows? && Encoding.default_external == Encoding::UTF_8 raise 'This test must be run in a codepage other than 65001 to validate behavior' end manifest_code = "notify { '#{rune_utf8}': }" manifest = file_containing('manifest.pp', manifest_code) lexed_file = described_class.new.lex_file(manifest) expect(lexed_file.string.encoding).to eq(Encoding::UTF_8) expect(lexed_file.string).to eq(manifest_code) end it 'currently errors when the UTF-8 BOM (Byte Order Mark) is present when lexing files' do bom = "\uFEFF" manifest_code = "#{bom}notify { '#{rune_utf8}': }" manifest = file_containing('manifest.pp', manifest_code) expect { lexed_file = described_class.new.lex_file(manifest) }.to raise_error(Puppet::ParseErrorWithIssue, 'Illegal UTF-8 Byte Order mark at beginning of input: [EF BB BF] - remove these from the puppet source') end end end