template.rb in htx-0.0.8

- old
+ new

@@ -2,44 +2,37 @@
 
 require('nokogiri')
 
 module HTX
   class Template
-    ELEMENT   = 0b001
-    CHILDLESS = 0b010
-    XMLNS     = 0b100
+    ELEMENT   = 1 << 0
+    CHILDLESS = 1 << 1
+    XMLNS     = 1 << 2
     FLAG_BITS = 3
 
     INDENT_DEFAULT = '  '
     CONTENT_TAG = 'htx-content'
     DYNAMIC_KEY_ATTR = 'htx-key'
-
     DEFAULT_XMLNS = {
       'math' => 'http://www.w3.org/1998/Math/MathML',
       'svg' => 'http://www.w3.org/2000/svg',
     }.freeze
 
-    LEADING_WHITESPACE = /\A[ \t]*\n[ \t]*/.freeze
-    TRAILING_WHITESPACE = /\n[ \t]*\z/.freeze
-    NON_BLANK_NON_FIRST_LINE = /(?<=\n)[ \t]*(?=\S)/.freeze
-    NEWLINE_NON_BLANK = /\n(?=[^\n])/.freeze
-    INDENT_GUESS = /^[ \t]+/.freeze
+    INDENT_VALID = /^( +|\t+)$/.freeze
+    INDENT_GUESS = /^( +|\t+)(?=\S)/.freeze
+    INDENT_REGEX = /\n(?=[^\n])/.freeze
 
-    END_STATEMENT_END = /(;|\n|\{|\})[ \t]*\z/.freeze
-    BEGIN_STATEMENT_END = /\A[ \t]*(;|\{|\n|\})/.freeze
-    END_WHITESPACE = /\s\z/.freeze
-    BEGIN_WHITESPACE = /\A\s/.freeze
+    NO_SEMICOLON_BEGIN = /\A\s*[\n;}]/.freeze
+    NO_SEMICOLON_END = /(\A|[\n;{}][^\S\n]*)\z/.freeze
 
-    RAW_VALUE = /\A\s*\${([\S\s]*)}\s*\z/.freeze
-    TEMPLATE_STRING = /\A\s*`([\S\s]*)`\s*\z/.freeze
-    INTERPOLATION = /\$\\?{([^}]*})?/.freeze
-    HTML_ENTITY = /&([a-zA-Z]+|#\d+|x[0-9a-fA-F]+);/.freeze
-    NON_CONTROL_STATEMENT = /#{INTERPOLATION}|(#{HTML_ENTITY})/.freeze
-    CONTROL_STATEMENT = /[{}();]/.freeze
-    UNESCAPED_BACKTICK = /(?<!\\)((\\\\)*)`/.freeze
-    CLOSE_STATEMENT = /;?\s*htx\.close\((\d*)\);?(\s*)\z/.freeze
+    NEWLINE_BEGIN = /\A\s*\n/.freeze
+    NEWLINE_END = /\n[^\S\n]*\z/.freeze
+    NEWLINE_END_OPTIONAL = /\n?[^\S\n]*\z/.freeze
 
+    WHITESPACE_BEGIN = /\A\s/.freeze
+    NON_WHITESPACE = /\S/.freeze
+
     ##
     # Returns false. In the near future when support for the <:> tag has been dropped (in favor of
     # <htx-content>), will return true if Nokogiri's HTML5 parser is available. To use it now, monkey patch
     # this method to return true.
     #
@@ -50,260 +43,366 @@
     ##
     # Returns Nokogiri's HTML5 parser if available and enabled, and Nokogiri's regular HTML parser
     # otherwise.
     #
     def self.nokogiri_parser
-      html5_parser? ? Nokogiri::HTML5::DocumentFragment : Nokogiri::HTML::DocumentFragment
+      html5_parser? ? Nokogiri::HTML5 : Nokogiri::HTML
     end
 
     ##
-    # Creates a new HTX instance.
+    # Creates a new instance.
     #
-    # * +name+ - Name of the template. Conventionally the path of the template file is used for the name,
-    #   but it can be anything.
-    # * +content+ - Template content string.
+    # * +name+ - Template name. Conventionally the path of the template file.
+    # * +content+ - Template content.
     #
     def initialize(name, content)
       @name = name
       @content = content
     end
 
     ##
     # Compiles the HTX template.
     #
-    # * +indent+ - Indent output by this number of spaces if Numeric, or by this string if a String (if the
-    #   latter, may only contain space and tab characters).
-    # * +assign_to+ - Assign the template function to this JavaScript object instead of the <tt>window</tt>
-    #   object.
+    # * +assign_to+ - JavaScript object to assign the template function to (default: <tt>window</tt>).
+    # * +indent+ - DEPRECATED. Indentation amount (number) or string (must be only spaces or tabs but not
+    #   both) to use for indentation of compiled output (default: indentation of first indented line of
+    #   uncompiled template).
     #
-    def compile(indent: nil, assign_to: 'window')
-      doc = self.class.nokogiri_parser.parse(@content)
-      root_nodes = doc.children.select { |n| n.element? || (n.text? && n.text.strip != '') }
-
-      if (text_node = root_nodes.find(&:text?))
-        raise(MalformedTemplateError.new('text nodes are not allowed at root level', @name, text_node))
-      elsif root_nodes.size == 0
-        raise(MalformedTemplateError.new('a root node is required', @name))
-      elsif root_nodes.size > 1
-        raise(MalformedTemplateError.new("root node already defined on line #{root_nodes[0].line}", @name,
-            root_nodes[1]))
-      end
-
-      @compiled = ''.dup
-      @static_key = 0
-
+    def compile(assign_to: nil, indent: (indent_omitted = true; nil))
+      @assign_to = assign_to || 'window'
       @indent =
         if indent.kind_of?(Numeric)
           ' ' * indent
-        elsif indent.kind_of?(String) && indent !~ /^[ \t]+$/
-          raise("Invalid indent value #{indent.inspect}: only spaces and tabs are allowed")
+        elsif indent && !indent.match?(INDENT_VALID)
+          raise("Invalid indent value #{indent.inspect}: only spaces and tabs (but not both) are allowed")
         else
           indent || @content[INDENT_GUESS] || INDENT_DEFAULT
         end
 
+      warn('The indent: option for HTX template compilation is deprecated.') unless indent_omitted
+
+      @static_key = 0
+      @close_count = 0
+      @whitespace_buff = nil
+      @statement_buff = +''
+      @compiled = +''
+
+      doc = self.class.nokogiri_parser.fragment(@content)
+      preprocess(doc)
       process(doc)
-      @compiled.rstrip!
 
-      <<~EOS
-        #{assign_to}['#{@name}'] = function(htx) {
-        #{@indent}#{@compiled}
-        }
-      EOS
+      @compiled
     end
 
     private
 
     ##
+    # Removes comment nodes and merges any adjoining text nodes that result from such removals.
+    #
+    # * +node+ - Nokogiri node to preprocess.
+    #
+    def preprocess(node)
+      if node.text?
+        if node.parent&.fragment? && node.blank?
+          node.remove
+        elsif (prev_node = node.previous)&.text?
+          prev_node.content += node.content
+          node.remove
+        end
+      elsif node.comment?
+        if node.previous&.text? && node.next&.text? && node.next.content.match?(NEWLINE_BEGIN)
+          content = node.previous.content.sub!(NEWLINE_END_OPTIONAL, '')
+          content.empty? ? node.previous.remove : node.previous.content = content
+        end
+
+        node.remove
+      end
+
+      node.children.each do |child|
+        preprocess(child)
+      end
+
+      if node.fragment?
+        children = node.children
+        root, root2 = children[0..1]
+
+        if (child = children.find(&:text?))
+          raise(MalformedTemplateError.new('text nodes are not allowed at root level', @name, child))
+        elsif !root
+          raise(MalformedTemplateError.new('a root node is required', @name))
+        elsif root2
+          raise(MalformedTemplateError.new("root node already defined on line #{root.line}", @name, root2))
+        end
+      end
+    end
+
+    ##
     # Processes a DOM node's descendents.
     #
-    # * +base+ - Base Nokogiri node to start from.
+    # * +node+ - Nokogiri node to process.
     #
-    def process(base, xmlns: false)
-      base.children.each do |node|
-        next unless node.element? || node.text?
+    def process(node, xmlns: false)
+      if node.fragment?
+        process_fragment_node(node)
+      elsif node.element?
+        process_element_node(node, xmlns: xmlns)
+      elsif node.text?
+        process_text_node(node)
+      else
+        raise(MalformedTemplateError.new("unrecognized node type #{node.class}", @name, node))
+      end
+    end
 
-        dynamic_key = process_value(node.attr(DYNAMIC_KEY_ATTR), :attr)
+    ##
+    # Processes a document fragment node.
+    #
+    # * +node+ - Nokogiri node to process.
+    #
+    def process_fragment_node(node)
+      append("#{@assign_to}['#{@name}'] = function(htx) {")
+      @whitespace_buff = "\n"
 
-        if node.text? || node.name == CONTENT_TAG || node.name == 'htx-text' || node.name == ':'
-          if !node.text? && node.name != CONTENT_TAG
-            warn("#{@name}:#{node.line}: The <#{node.name}> tag has been deprecated. Please use "\
-              "<#{CONTENT_TAG}> for identical functionality.")
-          end
+      node.children.each do |child|
+        process(child)
+      end
 
-          if (node.attributes.size - (dynamic_key ? 1 : 0)) != 0
-            raise(MalformedTemplateError.new("<#{node.name}> tags may not have attributes other than "\
-              "#{DYNAMIC_KEY_ATTR}", @name, node))
-          end
+      append("\n}\n",)
+      flush
+    end
 
-          if (non_text_node = node.children.find { |n| !n.text? })
-            raise(MalformedTemplateError.new("<#{node.name}> tags may not contain child tags", @name,
-              non_text_node))
-          end
+    ##
+    # Processes an element node.
+    #
+    # * +node+ - Nokogiri node to process.
+    # * +xmlns+ - True if node is the descendent of a node with an xmlns attribute.
+    #
+    def process_element_node(node, xmlns: false)
+      children = node.children
+      childless = children.empty? || (children.size == 1 && self.class.formatting_node?(children.first))
+      dynamic_key = self.class.attribute_value(node.attr(DYNAMIC_KEY_ATTR))
+      attributes = self.class.process_attributes(node)
+      xmlns ||= !!self.class.namespace(node)
 
-          text = (node.text? ? node : node.children).text
+      if self.class.htx_content_node?(node)
+        if node.name != CONTENT_TAG
+          warn("#{@name}:#{node.line}: The <#{node.name}> tag has been deprecated. Use <#{CONTENT_TAG}> "\
+            "for identical functionality.")
+        end
 
-          if (value = process_value(text))
-            append(
-              "#{indent(text[LEADING_WHITESPACE])}"\
-              "htx.node(#{[
-                value,
-                dynamic_key,
-                (@static_key += 1) << FLAG_BITS,
-              ].compact.join(', ')})"\
-              "#{indent(text[TRAILING_WHITESPACE])}"
-            )
-          else
-            append(indent(text))
+        if attributes.size > 0
+          raise(MalformedTemplateError.new("<#{node.name}> tags may not have attributes other than "\
+            "#{DYNAMIC_KEY_ATTR}", @name, node))
+        elsif (child = children.find { |n| !n.text? })
+          raise(MalformedTemplateError.new("<#{node.name}> tags may not contain child tags", @name, child))
+        end
+
+        process_text_node(
+          children.first || Nokogiri::XML::Text.new('', node.document),
+          dynamic_key: dynamic_key,
+        )
+      else
+        append_htx_node(
+          "'#{self.class.tag_name(node.name)}'",
+          *attributes,
+          dynamic_key,
+          ELEMENT | (childless ? CHILDLESS : 0) | (xmlns ? XMLNS : 0),
+        )
+
+        unless childless
+          children.each do |child|
+            process(child, xmlns: xmlns)
           end
-        else
-          childless = node.children.empty? || (node.children.size == 1 && node.children[0].text.strip == '')
-          attrs, explicit_xmlns = process_attrs(node)
-          xmlns ||= explicit_xmlns
 
-          append("htx.node(#{[
-            "'#{tag_name(node.name)}'",
-            attrs,
-            dynamic_key,
-            ((@static_key += 1) << FLAG_BITS) | ELEMENT | (childless ? CHILDLESS : 0) | (xmlns ? XMLNS : 0),
-          ].compact.flatten.join(', ')})")
+          @close_count += 1
+        end
+      end
+    end
 
-          unless childless
-            process(node, xmlns: xmlns)
+    ##
+    # Processes a text node.
+    #
+    # * +node+ - Nokogiri node to process.
+    # * +dynamic_key+ - Dynamic key of the parent if it's an <htx-content> node.
+    #
+    def process_text_node(node, dynamic_key: nil)
+      content = node.content
 
-            count = ''
-            @compiled.sub!(CLOSE_STATEMENT) do
-              count = $1 == '' ? 2 : $1.to_i + 1
-              $2
-            end
+      if node.blank?
+        if !content.include?("\n")
+          append_htx_node("`#{content}`")
+        elsif node.next
+          append(content)
+        else
+          @whitespace_buff = content[NEWLINE_END]
+        end
+      else
+        htx_content_node = self.class.htx_content_node?(node.parent)
+        parser = TextParser.new(content, statement_allowed: !htx_content_node)
+        parser.parse
 
-            append("htx.close(#{count})")
-          end
+        append(parser.leading) unless htx_content_node
+
+        if parser.statement?
+          append(indent(parser.content))
+        elsif parser.raw?
+          append_htx_node(indent(parser.content), dynamic_key)
+        else
+          append_htx_node(parser.content, dynamic_key)
         end
+
+        unless htx_content_node
+          append(parser.trailing)
+          @whitespace_buff = parser.whitespace_buff
+        end
       end
     end
 
     ##
     # Appends a string to the compiled template function string with appropriate punctuation and/or
     # whitespace inserted.
     #
     # * +text+ - String to append to the compiled template string.
     #
     def append(text)
-      if @compiled == ''
-        # Do nothing.
-      elsif @compiled !~ END_STATEMENT_END && text !~ BEGIN_STATEMENT_END
-        @compiled << '; '
-      elsif @compiled !~ END_WHITESPACE && text !~ BEGIN_WHITESPACE
-        @compiled << ' '
-      elsif @compiled[-1] == "\n"
-        @compiled << @indent
+      return @compiled if text.nil? || text.empty?
+
+      if @close_count > 0
+        close_count = @close_count
+        @close_count = 0
+
+        append("htx.close(#{close_count unless close_count == 1})")
       end
 
-      @compiled << text
+      if @whitespace_buff
+        @statement_buff << @whitespace_buff
+        @whitespace_buff = nil
+        confirmed_newline = true
+      end
+
+      if (confirmed_newline || @statement_buff.match?(NEWLINE_END)) && !text.match?(NEWLINE_BEGIN)
+        @statement_buff << @indent
+      elsif !@statement_buff.match?(NO_SEMICOLON_END) && !text.match?(NO_SEMICOLON_BEGIN)
+        @statement_buff << ";#{' ' unless text.match?(WHITESPACE_BEGIN)}"
+      end
+
+      flush if text.match?(NON_WHITESPACE)
+      @statement_buff << text
+
+      @compiled
     end
 
     ##
+    # Appends an +htx.node+ call to the compiled template function string.
+    #
+    # * +args+ - Arguments to use for the +htx.node+ call (any +nil+ ones are removed).
+    #
+    def append_htx_node(*args)
+      return if args.first.nil?
+
+      args.compact!
+      args << 0 unless args.last.kind_of?(Integer)
+      args[-1] |= (@static_key += 1) << FLAG_BITS
+
+      append("htx.node(#{args.join(', ')})")
+    end
+
+    ##
+    # Flushes statement buffer.
+    #
+    def flush
+      @compiled << @statement_buff
+      @statement_buff.clear
+
+      @compiled
+    end
+
+    ##
     # Indents each line of a string (except the first).
     #
     # * +text+ - String of lines to indent.
     #
     def indent(text)
-      return '' unless text
-
-      text.gsub!(NEWLINE_NON_BLANK, "\n#{@indent}")
+      text.gsub!(INDENT_REGEX, "\\0#{@indent}")
       text
     end
 
     ##
-    # Processes, formats, and encodes an attribute or text node value. Returns nil if the value is
-    # determined to be a control statement.
+    # Returns true if the node is whitespace containing at least one newline.
     #
-    # * +text+ - String to process.
-    # * +is_attr+ - Truthy if the text is an attribute value.
+    # * +node+ - Nokogiri node to check.
     #
-    def process_value(text, is_attr = false)
-      return nil if text.nil? || (!is_attr && text.strip == '')
-
-      if (value = text[RAW_VALUE, 1])
-        # Entire text is enclosed in ${...}.
-        value.strip!
-        quote = false
-        escape_quotes = false
-      elsif (value = text[TEMPLATE_STRING, 1])
-        # Entire text is enclosed in backticks (template string).
-        quote = true
-        escape_quotes = false
-      elsif is_attr || text.gsub(NON_CONTROL_STATEMENT, '') !~ CONTROL_STATEMENT
-        # Text is an attribute value or doesn't match control statement pattern.
-        value = text.dup
-        quote = true
-        escape_quotes = true
-      else
-        return nil
-      end
-
-      # Strip one leading and trailing newline (and attached spaces) and perform outdent. Outdent amount
-      # calculation ignores everything before the first newline in its search for the least-indented line.
-      outdent = value.scan(NON_BLANK_NON_FIRST_LINE).min
-      value.gsub!(/#{LEADING_WHITESPACE}|#{TRAILING_WHITESPACE}|^#{outdent}/, '')
-      value.gsub!(UNESCAPED_BACKTICK, '\1\\\`') if escape_quotes
-      value.insert(0, '`').insert(-1, '`') if quote
-
-      # Ensure any Unicode characters get converted to Unicode escape sequences. Also note that since
-      # Nokogiri converts HTML entities to Unicode characters, this causes them to be properly passed to
-      # `document.createTextNode` calls as Unicode escape sequences rather than (incorrectly) as HTML
-      # entities.
-      value.encode('ascii', fallback: ->(c) { "\\u#{c.ord.to_s(16).rjust(4, '0')}" })
+    def self.formatting_node?(node)
+      node.blank? && node.content.include?("\n")
     end
 
     ##
-    # Processes a node's attributes, returning two items: a flat array of attribute names and values, and a
-    # boolean indicating whether or not an xmlns attribute is present.
+    # Returns true if the node is an <htx-content> node (or one of its now-deprecated names).
     #
-    # Note: if the node is a <math> or <svg> tag without an explicit xmlns attribute set, an appropriate one
-    # will be automatically added since it is required for those elements to render properly.
+    # * +node+ - Nokogiri node to check.
     #
-    # * +node+ - Nokogiri node to process for attributes.
+    def self.htx_content_node?(node)
+      node && (node.name == CONTENT_TAG || node.name == 'htx-text' || node.name == ':')
+    end
+
+    ##
+    # Processes a node's attributes returning a flat array of attribute names and values.
     #
-    def process_attrs(node)
-      attrs = []
-      xmlns = !!node.attributes['xmlns']
+    # * +node+ - Nokogiri node to process the attributes of.
+    #
+    def self.process_attributes(node)
+      attributes = []
 
-      if !xmlns && DEFAULT_XMLNS[node.name]
-        xmlns = true
-
-        attrs << "'xmlns'"
-        attrs << process_value(DEFAULT_XMLNS[node.name], :attr)
+      if !node.attribute('xmlns') && (xmlns = namespace(node))
+        attributes.push(
+          attribute_name('xmlns'),
+          attribute_value(xmlns)
+        )
       end
 
-      node.attributes.each do |_, attr|
-        next if attr.name == DYNAMIC_KEY_ATTR
+      node.attribute_nodes.each.with_object(attributes) do |attribute, attributes|
+        next if attribute.node_name == DYNAMIC_KEY_ATTR
 
-        attrs << "'#{attr_name(attr.name)}'"
-        attrs << process_value(attr.value, :attr)
+        attributes.push(
+          attribute_name(attribute.node_name),
+          attribute_value(attribute.value)
+        )
       end
+    end
 
-      [attrs, xmlns]
+    ##
+    #
+    #
+    def self.namespace(node)
+      node.namespace&.href || DEFAULT_XMLNS[node.name]
     end
 
     ##
     # Returns the given text if the HTML5 parser is in use, or looks up the value in the tag map to get the
     # correctly-cased version, falling back to the supplied text if no mapping exists.
     #
-    # * +text+ - Tag name as returned by Nokogiri parser.
+    # * +text+ - Tag name as returned by Nokogiri.
     #
-    def tag_name(text)
-      self.class.html5_parser? ? text : (TAG_MAP[text] || text)
+    def self.tag_name(text)
+      html5_parser? ? text : (TAG_MAP[text] || text)
     end
 
     ##
     # Returns the given text if the HTML5 parser is in use, or looks up the value in the attribute map to
     # get the correctly-cased version, falling back to the supplied text if no mapping exists.
     #
-    # * +text+ - Attribute name as returned by Nokogiri parser.
+    # * +text+ - Attribute name as returned by Nokogiri.
     #
-    def attr_name(text)
-      self.class.html5_parser? ? text : (ATTR_MAP[text] || text)
+    def self.attribute_name(text)
+      "'#{html5_parser? ? text : (ATTR_MAP[text] || text)}'"
+    end
+
+    ##
+    # Returns the processed value of an attribute.
+    #
+    # * +text+ - Attribute value as returned by Nokogiri.
+    #
+    def self.attribute_value(text)
+      text ? TextParser.new(text, statement_allowed: false).parse : nil
     end
 
     # The Nokogiri HTML parser downcases all tag and attribute names, but SVG tags and attributes are case
     # sensitive and often mix cased. These maps are used to restore the correct case of such tags and
     # attributes.