table.rb in asciidoctor-1.5.6

- old
+ new

@@ -12,64 +12,37 @@
       @head = head
       @foot = foot
       @body = body
     end
 
-    alias :[] :send
+    alias [] send
+
+    # Public: Returns the rows grouped by section.
+    #
+    # Creates a 2-dimensional array of two element entries. The first element
+    # is the section name as a symbol. The second element is the Array of rows
+    # in that section. The entries are in document order (head, foot, body).
+    #
+    # Returns a 2-dimentional Array of rows grouped by section.
+    def by_section
+      [[:head, @head], [:foot, @foot], [:body, @body]]
+    end
   end
 
-  # Public: A String key that specifies the default table format in AsciiDoc (psv)
-  DEFAULT_DATA_FORMAT = 'psv'
-
-  # Public: An Array of String keys that represent the table formats in AsciiDoc
-  DATA_FORMATS = ['psv', 'dsv', 'csv']
-
-  # Public: A Hash mapping the AsciiDoc table formats to their default delimiters
-  DEFAULT_DELIMITERS = {
-    'psv' => '|',
-    'dsv' => ':',
-    'csv' => ','
-  }
-
-  # Public: A Hash mapping styles abbreviations to styles that can be applied
-  # to a table column or cell
-  TEXT_STYLES = {
-    'd' => :none,
-    's' => :strong,
-    'e' => :emphasis,
-    'm' => :monospaced,
-    'h' => :header,
-    'l' => :literal,
-    'v' => :verse,
-    'a' => :asciidoc
-  }
-
-  # Public: A Hash mapping alignment abbreviations to alignments (horizontal
-  # and vertial) that can be applies to a table column or cell
-  ALIGNMENTS = {
-    :h => {
-      '<' => 'left',
-      '>' => 'right',
-      '^' => 'center'
-    },
-    :v => {
-      '<' => 'top',
-      '>' => 'bottom',
-      '^' => 'middle'
-    }
-  }
-
   # Public: Get/Set the columns for this table
   attr_accessor :columns
 
   # Public: Get/Set the Rows struct for this table (encapsulates head, foot
   # and body rows)
   attr_accessor :rows
 
   # Public: Boolean specifies whether this table has a header row
   attr_accessor :has_header_option
 
+  # Public: Get the caption for this table
+  attr_reader :caption
+
   def initialize parent, attributes
     super parent, :table
     @rows = Rows.new
     @columns = []
 
@@ -85,10 +58,11 @@
       pcwidth_intval = 100
     end
     @attributes['tablepcwidth'] = pcwidth_intval
 
     if @document.attributes.key? 'pagewidth'
+      # FIXME calculate more accurately (only used in DocBook output)
       @attributes['tableabswidth'] ||=
           ((@attributes['tablepcwidth'].to_f / 100) * @document.attributes['pagewidth']).round
     end
 
     attributes['orientation'] = 'landscape' if attributes.key? 'rotate-option'
@@ -133,25 +107,35 @@
 
     if width_base
       @columns.each {|col| total_width += (col_pcwidth = col.assign_width nil, width_base, pf) }
     else
       col_pcwidth = ((100 * pf / @columns.size).to_i) / pf
+      # or...
+      #col_pcwidth = (100.0 / @columns.size).truncate 4
       col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth
       @columns.each {|col| total_width += col.assign_width col_pcwidth }
     end
 
-    # donate balance, if any, to final column
-    @columns[-1].assign_width(((100 - total_width + col_pcwidth) * pf).round / pf) unless total_width == 100
+    # donate balance, if any, to final column (using half up rounding)
+    unless total_width == 100
+      @columns[-1].assign_width(((100 - total_width + col_pcwidth) * pf).round / pf)
+      # or (manual half up rounding)...
+      #numerator = (raw_numerator = (100 - total_width + col_pcwidth) * pf).to_i
+      #numerator += 1 if raw_numerator >= numerator + 0.5
+      #@columns[-1].assign_width numerator / pf
+      # or...
+      #@columns[-1].assign_width((100 - total_width + col_pcwidth).round 4)
+    end
 
     nil
   end
 
   # Internal: Partition the rows into header, footer and body as determined
   # by the options on the table
   #
   # returns nothing
-  def partition_header_footer(attributes)
+  def partition_header_footer(attrs)
     # set rowcount before splitting up body rows
     @attributes['rowcount'] = @rows.body.size
 
     num_body_rows = @rows.body.size
     if num_body_rows > 0 && @has_header_option
@@ -162,11 +146,11 @@
       # QUESTION why does AsciiDoc use an array for head? is it
       # possible to have more than one based on the syntax?
       @rows.head = [head]
     end
 
-    if num_body_rows > 0 && attributes.key?('footer-option')
+    if num_body_rows > 0 && attrs.key?('footer-option')
       @rows.foot = [@rows.body.pop]
     end
 
     nil
   end
@@ -187,20 +171,22 @@
     attributes['valign'] ||= 'top'
     update_attributes(attributes)
   end
 
   # Public: An alias to the parent block (which is always a Table)
-  alias :table :parent
+  alias table parent
 
   # Internal: Calculate and assign the widths (percentage and absolute) for this column
   #
   # This method assigns the colpcwidth and colabswidth attributes.
   #
   # returns the resolved colpcwidth value
   def assign_width col_pcwidth, width_base = nil, pf = 10000.0
     if width_base
       col_pcwidth = ((@attributes['width'].to_f / width_base) * 100 * pf).to_i / pf
+      # or...
+      #col_pcwidth = (@attributes['width'].to_f * 100.0 / width_base).truncate 4
       col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth
     end
     @attributes['colpcwidth'] = col_pcwidth
     if parent.attributes.key? 'tableabswidth'
       # FIXME calculate more accurately (only used in DocBook output)
@@ -220,62 +206,94 @@
 
   # Public: An Integer of the number of rows this cell will span (default: nil)
   attr_accessor :rowspan
 
   # Public: An alias to the parent block (which is always a Column)
-  alias :column :parent
+  alias column parent
 
   # Public: The internal Asciidoctor::Document for a cell that has the asciidoc style
   attr_reader :inner_document
 
-  def initialize column, text, attributes = {}, cursor = nil
+  def initialize column, cell_text, attributes = {}, opts = {}
     super column, :cell
-    @text = text
-    @style = nil
-    @colspan = nil
-    @rowspan = nil
-    # TODO feels hacky
     if column
-      @style = column.attributes['style']
-      update_attributes(column.attributes)
+      cell_style = (in_header_row = column.table.header_row?) ? nil : column.attributes['style']
+      # REVIEW feels hacky to inherit all attributes from column
+      update_attributes column.attributes
+    else
+      in_header_row = cell_style = nil
     end
     if attributes
-      @colspan = attributes.delete('colspan')
-      @rowspan = attributes.delete('rowspan')
-      # TODO eventualy remove the style attribute from the attributes hash
-      #@style = attributes.delete('style') if attributes.key? 'style'
-      @style = attributes['style'] if attributes.key? 'style'
-      update_attributes(attributes)
+      @colspan = attributes.delete 'colspan'
+      @rowspan = attributes.delete 'rowspan'
+      # TODO eventually remove the style attribute from the attributes hash
+      #cell_style = attributes.delete 'style' unless in_header_row || !(attributes.key? 'style')
+      cell_style = attributes['style'] unless in_header_row || !(attributes.key? 'style')
+      if opts[:strip_text]
+        if cell_style == :literal || cell_style == :verse
+          cell_text = cell_text.rstrip
+          cell_text = cell_text.slice 1, cell_text.length - 1 while cell_text.start_with? LF
+        else
+          cell_text = cell_text.strip
+        end
+      end
+      update_attributes attributes
+    else
+      @colspan = nil
+      @rowspan = nil
     end
-    # only allow AsciiDoc cells in non-header rows
-    if @style == :asciidoc && !column.table.header_row?
+    # NOTE only true for non-header rows
+    if cell_style == :asciidoc
       # FIXME hide doctitle from nested document; temporary workaround to fix
       # nested document seeing doctitle and assuming it has its own document title
       parent_doctitle = @document.attributes.delete('doctitle')
       # NOTE we need to process the first line of content as it may not have been processed
       # the included content cannot expect to match conditional terminators in the remaining
       # lines of table cell content, it must be self-contained logic
-      inner_document_lines = @text.split(EOL)
-      unless inner_document_lines.empty? || !inner_document_lines[0].include?('::')
-        unprocessed_lines = inner_document_lines[0]
-        processed_lines = PreprocessorReader.new(@document, unprocessed_lines).readlines
-        if processed_lines != unprocessed_lines
+      # QUESTION should we reset cell_text to nil?
+      # QUESTION is is faster to check for :: before splitting?
+      inner_document_lines = cell_text.split LF, -1
+      if (unprocessed_line1 = inner_document_lines[0]).include? '::'
+        preprocessed_lines = (PreprocessorReader.new @document, [unprocessed_line1]).readlines
+        unless unprocessed_line1 == preprocessed_lines[0] && preprocessed_lines.size < 2
           inner_document_lines.shift
-          inner_document_lines.unshift(*processed_lines)
+          inner_document_lines.unshift(*preprocessed_lines) unless preprocessed_lines.empty?
         end
-      end
-      @inner_document = Document.new(inner_document_lines, :header_footer => false, :parent => @document, :cursor => cursor)
+      end unless inner_document_lines.empty?
+      @inner_document = Document.new(inner_document_lines, :header_footer => false, :parent => @document, :cursor => opts[:cursor])
       @document.attributes['doctitle'] = parent_doctitle unless parent_doctitle.nil?
     end
+    @text = cell_text
+    @style = cell_style
   end
 
-  # Public: Get the text with normal substitutions applied for this cell. Used for cells in the head rows
+  # Public: Get the String text of this cell with substitutions applied.
+  #
+  # Used for cells in the head row as well as text-only (non-AsciiDoc) cells in
+  # the foot row and body.
+  #
+  # This method shouldn't be used for cells that have the AsciiDoc style.
+  #
+  # Returns the converted String text for this Cell
   def text
-    apply_normal_subs(@text).strip
+    apply_subs @text, (@style == :literal ? BASIC_SUBS : NORMAL_SUBS)
   end
 
+  # Public: Set the String text.
+  #
+  # This method shouldn't be used for cells that have the AsciiDoc style.
+  #
+  # Returns the new String text assigned to this Cell
+  def text= val
+    @text = val
+  end
+
   # Public: Handles the body data (tbody, tfoot), applying styles and partitioning into paragraphs
+  #
+  # This method should not be used for cells in the head row or that have the literal or verse style.
+  #
+  # Returns the converted String for this Cell
   def content
     if @style == :asciidoc
       @inner_document.convert
     else
       text.split(BlankLineRx).map do |p|
@@ -294,15 +312,28 @@
 # moves through the lines of the table using tail recursion. When a cell boundary
 # is located, the previous cell is closed, an instance of Table::Cell is
 # instantiated, the row is closed if the cell satisifies the column count and,
 # finally, a new buffer is allocated to track the next cell.
 class Table::ParserContext
+  # Public: An Array of String keys that represent the table formats in AsciiDoc
+  #--
+  # QUESTION should we recognize !sv as a valid format value?
+  FORMATS = ['psv', 'csv', 'dsv', 'tsv'].to_set
 
+  # Public: A Hash mapping the AsciiDoc table formats to default delimiters
+  DELIMITERS = {
+    'psv' => ['|', /\|/],
+    'csv' => [',', /,/],
+    'dsv' => [':', /:/],
+    'tsv' => [%(\t), /\t/],
+    '!sv' => ['!', /!/]
+  }
+
   # Public: The Table currently being parsed
   attr_accessor :table
 
-  # Public: The AsciiDoc table format (psv, dsv or csv)
+  # Public: The AsciiDoc table format (psv, dsv, or csv)
   attr_accessor :format
 
   # Public: Get the expected column count for a row
   #
   # colcount is the number of columns to pull into a row
@@ -317,29 +348,45 @@
   attr_reader :delimiter
 
   # Public: The cell delimiter compiled Regexp for this table.
   attr_reader :delimiter_re
 
-  def initialize(reader, table, attributes = {})
+  def initialize reader, table, attributes = {}
     @reader = reader
     @table = table
-    # TODO if reader.cursor becomes a reference, this would require .dup
+    # IMPORTANT if reader.cursor becomes a reference, this assignment would require .dup
     @last_cursor = reader.cursor
-    if (@format = attributes['format'])
-      unless Table::DATA_FORMATS.include? @format
-        raise %(Illegal table format: #{@format})
+
+    if attributes.key? 'format'
+      if FORMATS.include?(xsv = attributes['format'])
+        if xsv == 'tsv'
+          # NOTE tsv is just an alias for csv with a tab separator
+          @format = 'csv'
+        elsif (@format = xsv) == 'psv' && table.document.nested?
+          xsv = '!sv'
+        end
+      else
+        warn %(asciidoctor: ERROR: #{reader.prev_line_info}: illegal table format: #{xsv})
+        @format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv')
       end
     else
-      @format = Table::DEFAULT_DATA_FORMAT
+      @format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv')
     end
 
-    @delimiter = if @format == 'psv' && !(attributes.key? 'separator') && table.document.nested?
-      '!'
+    if attributes.key? 'separator'
+      if (sep = attributes['separator']).nil_or_empty?
+        @delimiter, @delimiter_re = DELIMITERS[xsv]
+      # QUESTION should we support any other escape codes or multiple tabs?
+      elsif sep == '\t'
+        @delimiter, @delimiter_re = DELIMITERS['tsv']
+      else
+        @delimiter, @delimiter_re = sep, /#{::Regexp.escape sep}/
+      end
     else
-      attributes['separator'] || Table::DEFAULT_DELIMITERS[@format]
+      @delimiter, @delimiter_re = DELIMITERS[xsv]
     end
-    @delimiter_re = /#{Regexp.escape @delimiter}/
+
     @colcount = table.columns.empty? ? -1 : table.columns.size
     @buffer = ''
     @cellspecs = []
     @cell_open = false
     @active_rowspans = [0]
@@ -362,37 +409,42 @@
   # returns Regexp MatchData if the line contains the delimiter, false otherwise
   def match_delimiter(line)
     @delimiter_re.match(line)
   end
 
-  # Public: Skip beyond the matched delimiter because it was a false positive
-  # (either because it was escaped or in a quoted context)
+  # Public: Skip past the matched delimiter because it's inside quoted text.
   #
   # returns the String after the match
-  def skip_matched_delimiter(match, escaped = false)
-    @buffer = %(#{@buffer}#{escaped ? match.pre_match.chop : match.pre_match}#{@delimiter})
+  def skip_past_delimiter(match)
+    @buffer = %(#{@buffer}#{match.pre_match}#{@delimiter})
     match.post_match
   end
 
+  # Public: Skip past the matched delimiter because it's escaped.
+  #
+  # returns the String after the match
+  def skip_past_escaped_delimiter(match)
+    @buffer = %(#{@buffer}#{match.pre_match.chop}#{@delimiter})
+    match.post_match
+  end
+
   # Public: Determines whether the buffer has unclosed quotes. Used for CSV data.
   #
   # returns true if the buffer has unclosed quotes, false if it doesn't or it
   # isn't quoted data
-  def buffer_has_unclosed_quotes?(append = nil)
-    record = %(#{@buffer}#{append}).strip
-    record.start_with?('"') && !record.start_with?('""') && !record.end_with?('"')
+  def buffer_has_unclosed_quotes? append = nil
+    if (record = append ? (buffer + append).strip : buffer.strip).start_with? '"'
+      if ((trailing_quote = record.end_with? '"') && (record.end_with? '""')) || (record.start_with? '""')
+        ((record = record.gsub '""', '').start_with? '"') && !(record.end_with? '"')
+      else
+        !trailing_quote
+      end
+    else
+      false
+    end
   end
 
-  # Public: Determines whether the buffer contains quoted data. Used for CSV data.
-  #
-  # returns true if the buffer starts with a double quote (and not an escaped double quote),
-  # false otherwise
-  def buffer_quoted?
-    @buffer = @buffer.lstrip
-    @buffer.start_with?('"') && !@buffer.start_with?('""')
-  end
-
   # Public: Takes a cell spec from the stack. Cell specs precede the delimiter, so a
   # stack is used to carry over the spec from the previous cell to the current cell
   # when the cell is being closed.
   #
   # returns The cell spec Hash captured from parsing the previous cell
@@ -458,34 +510,37 @@
   # the current row and, if the number of expected columns for the current
   # row has been met, close the row and begin a new one.
   #
   # returns nothing
   def close_cell(eol = false)
-    cell_text = @buffer.strip
-    @buffer = ''
     if @format == 'psv'
-      cellspec = take_cellspec
-      if cellspec
+      strip_text = true
+      cell_text = @buffer
+      @buffer = ''
+      if (cellspec = take_cellspec)
         repeat = cellspec.delete('repeatcol') || 1
       else
         warn %(asciidoctor: ERROR: #{@last_cursor.line_info}: table missing leading separator, recovering automatically)
         cellspec = {}
         repeat = 1
       end
     else
+      strip_text = false
+      cell_text = @buffer.strip
+      @buffer = ''
       cellspec = nil
       repeat = 1
       if @format == 'csv'
         if !cell_text.empty? && cell_text.include?('"')
           # this may not be perfect logic, but it hits the 99%
           if cell_text.start_with?('"') && cell_text.end_with?('"')
             # unquote
             cell_text = cell_text[1...-1].strip
           end
 
-          # collapses escaped quotes
-          cell_text = cell_text.tr_s('"', '"')
+          # collapse escaped quotes
+          cell_text = cell_text.squeeze('"')
         end
       end
     end
 
     1.upto(repeat) do |i|
@@ -504,10 +559,10 @@
           warn %(asciidoctor: ERROR: #{@last_cursor.line_info}: dropping cell because it exceeds specified number of columns)
           return
         end
       end
 
-      cell = Table::Cell.new(column, cell_text, cellspec, @last_cursor)
+      cell = Table::Cell.new(column, cell_text, cellspec, :cursor => @last_cursor, :strip_text => strip_text)
       @last_cursor = @reader.cursor
       unless !cell.rowspan || cell.rowspan == 1
         activate_rowspan(cell.rowspan, (cell.colspan || 1))
       end
       @column_visits += (cell.colspan || 1)