require 'strscan' class Mustache # The Parser is responsible for taking a string template and # converting it into an array of tokens and, really, expressions. It # raises SyntaxError if there is anything it doesn't understand and # knows which sigil corresponds to which tag type. # # For example, given this template: # # Hi {{thing}}! # # Run through the Parser we'll get these tokens: # # [:multi, # [:static, "Hi "], # [:mustache, :etag, "thing"], # [:static, "!\n"]] # # You can see the array of tokens for any template with the # mustache(1) command line tool: # # $ mustache --tokens test.mustache # [:multi, [:static, "Hi "], [:mustache, :etag, "thing"], [:static, "!\n"]] class Parser # A SyntaxError is raised when the Parser comes across unclosed # tags, sections, illegal content in tags, or anything of that # sort. class SyntaxError < StandardError def initialize(message, position) @message = message @lineno, @column, @line, _ = position @stripped_line = @line.strip @stripped_column = @column - (@line.size - @line.lstrip.size) end def to_s <<-EOF #{@message} Line #{@lineno} #{@stripped_line} #{' ' * @stripped_column}^ EOF end end # The sigil types which are valid after an opening `{{` VALID_TYPES = [ '#', '^', '/', '=', '!', '<', '>', '&', '{' ].map(&:freeze) def self.valid_types @valid_types ||= Regexp.new(VALID_TYPES.map { |t| Regexp.escape(t) }.join('|') ) end # Add a supported sigil type (with optional aliases) to the Parser. # # Requires a block, which will be sent the following parameters: # # * content - The raw content of the tag # * fetch- A mustache context fetch expression for the content # * padding - Indentation whitespace from the currently-parsed line # * pre_match_position - Location of the scanner before a match was made # # The provided block will be evaluated against the current instance of # Parser, and may append to the Parser's @result as needed. def self.add_type(*types, &block) types = types.map(&:to_s) type, *aliases = types method_name = "scan_tag_#{type}".to_sym define_method(method_name, &block) aliases.each { |a| alias_method "scan_tag_#{a}", method_name } types.each { |t| VALID_TYPES << t unless VALID_TYPES.include?(t) } @valid_types = nil end # After these types of tags, all whitespace until the end of the line will # be skipped if they are the first (and only) non-whitespace content on # the line. SKIP_WHITESPACE = [ '#', '^', '/', '<', '>', '=', '!' ].map(&:freeze) # The content allowed in a tag name. ALLOWED_CONTENT = /(\w|[?!\/.-])*/ # These types of tags allow any content, # the rest only allow ALLOWED_CONTENT. ANY_CONTENT = [ '!', '=' ].map(&:freeze) attr_reader :otag, :ctag # Accepts an options hash which does nothing but may be used in # the future. def initialize(options = {}) @options = options @option_inline_partials_at_compile_time = options[:inline_partials_at_compile_time] if @option_inline_partials_at_compile_time @partial_resolver = options[:partial_resolver] raise ArgumentError.new "Missing or invalid partial_resolver" unless @partial_resolver.respond_to? :call end # Initialize default tags self.otag ||= '{{' self.ctag ||= '}}' end # The opening tag delimiter. This may be changed at runtime. def otag=(value) regex = regexp value @otag_regex = /([ \t]*)?#{regex}/ @otag_not_regex = /(^[ \t]*)?#{regex}/ @otag = value end # The closing tag delimiter. This too may be changed at runtime. def ctag=(value) @ctag_regex = regexp value @ctag = value end # Given a string template, returns an array of tokens. def compile(template) @encoding = nil if template.respond_to?(:encoding) @encoding = template.encoding template = template.dup.force_encoding("BINARY") end # Keeps information about opened sections. @sections = [] @result = [:multi] @scanner = StringScanner.new(template) # Scan until the end of the template. until @scanner.eos? scan_tags || scan_text end unless @sections.empty? # We have parsed the whole file, but there's still opened sections. type, pos, _ = @sections.pop error "Unclosed section #{type.inspect}", pos end @result end private def content_tags type, current_ctag_regex if ANY_CONTENT.include?(type) r = /\s*#{regexp(type)}?#{current_ctag_regex}/ scan_until_exclusive(r) else @scanner.scan(ALLOWED_CONTENT) end end def dispatch_based_on_type type, content, fetch, padding, pre_match_position send("scan_tag_#{type}", content, fetch, padding, pre_match_position) end def find_closing_tag scanner, current_ctag_regex error "Unclosed tag" unless scanner.scan(current_ctag_regex) end # Find {{mustaches}} and add them to the @result array. def scan_tags # Scan until we hit an opening delimiter. start_of_line = @scanner.beginning_of_line? pre_match_position = @scanner.pos last_index = @result.length return unless @scanner.scan @otag_regex padding = @scanner[1] || '' # Don't touch the preceding whitespace unless we're matching the start # of a new line. unless start_of_line @result << [:static, padding] unless padding.empty? pre_match_position += padding.length padding = '' end # Since {{= rewrites ctag, we store the ctag which should be used # when parsing this specific tag. current_ctag_regex = @ctag_regex type = @scanner.scan(self.class.valid_types) @scanner.skip(/\s*/) # ANY_CONTENT tags allow any character inside of them, while # other tags (such as variables) are more strict. content = content_tags(type, current_ctag_regex) # We found {{ but we can't figure out what's going on inside. error "Illegal content in tag" if content.empty? fetch = [:mustache, :fetch, content.split('.')] prev = @result dispatch_based_on_type(type, content, fetch, padding, pre_match_position) # The closing } in unescaped tags is just a hack for # aesthetics. type = "}" if type == "{" # Skip whitespace and any balancing sigils after the content # inside this tag. @scanner.skip(/\s+/) @scanner.skip(regexp(type)) if type find_closing_tag(@scanner, current_ctag_regex) # If this tag was the only non-whitespace content on this line, strip # the remaining whitespace. If not, but we've been hanging on to padding # from the beginning of the line, re-insert the padding as static text. if start_of_line && !@scanner.eos? if @scanner.peek(2) =~ /\r?\n/ && SKIP_WHITESPACE.include?(type) @scanner.skip(/\r?\n/) else prev.insert(last_index, [:static, padding]) unless padding.empty? end end # Store off the current scanner position now that we've closed the tag # and consumed any irrelevant whitespace. @sections.last[1] << @scanner.pos unless @sections.empty? return unless @result == [:multi] end # Try to find static text, e.g. raw HTML with no {{mustaches}}. def scan_text text = scan_until_exclusive @otag_not_regex if text.nil? # Couldn't find any otag, which means the rest is just static text. text = @scanner.rest # Mark as done. @scanner.terminate end text.force_encoding(@encoding) if @encoding @result << [:static, text] unless text.empty? end # Scans the string until the pattern is matched. Returns the substring # *excluding* the end of the match, advancing the scan pointer to that # location. If there is no match, nil is returned. def scan_until_exclusive(regexp) pos = @scanner.pos if @scanner.scan_until(regexp) @scanner.pos -= @scanner.matched.size @scanner.pre_match[pos..-1] end end def offset position[0, 2] end # Returns [lineno, column, line] def position # The rest of the current line rest = @scanner.check_until(/\n|\Z/).to_s.chomp # What we have parsed so far parsed = @scanner.string[0...@scanner.pos] lines = parsed.split("\n") [ lines.size, lines.last.size - 1, lines.last + rest ] end # Used to quickly convert a string into a regular expression # usable by the string scanner. def regexp(thing) Regexp.new Regexp.escape(thing) if thing end # Raises a SyntaxError. The message should be the name of the # error - other details such as line number and position are # handled for you. def error(message, pos = position) raise SyntaxError.new(message, pos) end # # Scan tags # # These methods are called in `scan_tags`. Because they contain nonstandard # characters in their method names, they are aliased to # better named methods. # # This function handles the cases where the scanned tag does not have # a type. def scan_tag_ content, fetch, padding, pre_match_position @result << [:mustache, :etag, fetch, offset] end def scan_tag_block content, fetch, padding, pre_match_position block = [:multi] @result << [:mustache, :section, fetch, offset, block] @sections << [content, position, @result] @result = block end alias_method :'scan_tag_#', :scan_tag_block def scan_tag_inverted content, fetch, padding, pre_match_position block = [:multi] @result << [:mustache, :inverted_section, fetch, offset, block] @sections << [content, position, @result] @result = block end alias_method :'scan_tag_^', :scan_tag_inverted def scan_tag_close content, fetch, padding, pre_match_position section, pos, result = @sections.pop if section.nil? error "Closing unopened #{content.inspect}" end raw = @scanner.pre_match[pos[3]...pre_match_position] + padding (@result = result).last << raw << [self.otag, self.ctag] if section != content error "Unclosed section #{section.inspect}", pos end end alias_method :'scan_tag_/', :scan_tag_close def scan_tag_comment content, fetch, padding, pre_match_position end alias_method :'scan_tag_!', :scan_tag_comment def scan_tag_delimiter content, fetch, padding, pre_match_position self.otag, self.ctag = content.split(' ', 2) end alias_method :'scan_tag_=', :scan_tag_delimiter def scan_tag_open_partial content, fetch, padding, pre_match_position @result << if @option_inline_partials_at_compile_time partial = @partial_resolver.call content partial.gsub!(/^/, padding) unless padding.empty? self.class.new(@options).compile partial else [:mustache, :partial, content, offset, padding] end end alias_method :'scan_tag_<', :scan_tag_open_partial alias_method :'scan_tag_>', :scan_tag_open_partial def scan_tag_unescaped content, fetch, padding, pre_match_position @result << [:mustache, :utag, fetch, offset] end alias_method :'scan_tag_{', :'scan_tag_unescaped' alias_method :'scan_tag_&', :'scan_tag_unescaped' end end