# frozen_string_literal: true module Asciidoctor # Public: Methods and constants for managing AsciiDoc table content in a document. # It supports all three of AsciiDoc's table formats: psv, dsv and csv. class Table < AbstractBlock # precision of column widths DEFAULT_PRECISION = 4 # Public: A data object that encapsulates the collection of rows (head, foot, body) for a table class Rows attr_accessor :head, :foot, :body def initialize head = [], foot = [], body = [] @head = head @foot = foot @body = body end alias [] send # Public: Retrieve the rows grouped by section as a nested Array. # # Creates a 2-dimensional array of two element entries. The first element # is the section name as a symbol. The second element is the Array of rows # in that section. The entries are in document order (head, foot, body). # # Returns a 2-dimentional Array of rows grouped by section. def by_section [[:head, @head], [:body, @body], [:foot, @foot]] end # Public: Retrieve the rows as a Hash. # # The keys are the names of the section groups and the values are the Array of rows in that section. # The keys are in document order (head, foot, body). # # Returns a Hash of rows grouped by section. def to_h { head: @head, body: @body, foot: @foot } end end # Public: Get/Set the columns for this table attr_accessor :columns # Public: Get/Set the Rows struct for this table (encapsulates head, foot # and body rows) attr_accessor :rows # Public: Boolean specifies whether this table has a header row attr_accessor :has_header_option # Public: Get the caption for this table attr_reader :caption def initialize parent, attributes super parent, :table @rows = Rows.new @columns = [] @has_header_option = attributes['header-option'] ? true : false # smells like we need a utility method here # to resolve an integer width from potential bogus input if (pcwidth = attributes['width']) if (pcwidth_intval = pcwidth.to_i) > 100 || pcwidth_intval < 1 pcwidth_intval = 100 unless pcwidth_intval == 0 && (pcwidth == '0' || pcwidth == '0%') end else pcwidth_intval = 100 end @attributes['tablepcwidth'] = pcwidth_intval if @document.attributes['pagewidth'] @attributes['tableabswidth'] = (abswidth_val = (((pcwidth_intval / 100.0) * @document.attributes['pagewidth'].to_f).truncate DEFAULT_PRECISION)) == abswidth_val.to_i ? abswidth_val.to_i : abswidth_val end @attributes['orientation'] = 'landscape' if attributes['rotate-option'] end # Internal: Returns whether the current row being processed is # the header row def header_row? @has_header_option && @rows.body.empty? end # Internal: Creates the Column objects from the column spec # # returns nothing def create_columns colspecs cols = [] autowidth_cols = nil width_base = 0 colspecs.each do |colspec| colwidth = colspec['width'] cols << (Column.new self, cols.size, colspec) if colwidth < 0 (autowidth_cols ||= []) << cols[-1] else width_base += colwidth end end if (num_cols = (@columns = cols).size) > 0 @attributes['colcount'] = num_cols width_base = nil unless width_base > 0 || autowidth_cols assign_column_widths width_base, autowidth_cols end nil end # Internal: Assign column widths to columns # # This method rounds the percentage width values to 4 decimal places and # donates the balance to the final column. # # This method assumes there's at least one column in the columns array. # # width_base - the total of the relative column values used for calculating percentage widths (default: nil) # # returns nothing def assign_column_widths width_base = nil, autowidth_cols = nil precision = DEFAULT_PRECISION total_width = col_pcwidth = 0 if width_base if autowidth_cols if width_base > 100 autowidth = 0 logger.warn %(total column width must not exceed 100% when using autowidth columns; got #{width_base}%) else autowidth = ((100.0 - width_base) / autowidth_cols.size).truncate precision autowidth = autowidth.to_i if autowidth.to_i == autowidth width_base = 100 end autowidth_attrs = { 'width' => autowidth, 'autowidth-option' => '' } autowidth_cols.each {|col| col.update_attributes autowidth_attrs } end @columns.each {|col| total_width += (col_pcwidth = col.assign_width nil, width_base, precision) } else col_pcwidth = (100.0 / @columns.size).truncate precision col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth @columns.each {|col| total_width += col.assign_width col_pcwidth, nil, precision } end # donate balance, if any, to final column (using half up rounding) @columns[-1].assign_width(((100 - total_width + col_pcwidth).round precision), nil, precision) unless total_width == 100 nil end # Internal: Partition the rows into header, footer and body as determined # by the options on the table # # returns nothing def partition_header_footer(attrs) # set rowcount before splitting up body rows @attributes['rowcount'] = @rows.body.size num_body_rows = @rows.body.size if num_body_rows > 0 && @has_header_option head = @rows.body.shift num_body_rows -= 1 # styles aren't applied to header row head.each {|c| c.style = nil } # QUESTION why does AsciiDoc use an array for head? is it # possible to have more than one based on the syntax? @rows.head = [head] end if num_body_rows > 0 && attrs['footer-option'] @rows.foot = [@rows.body.pop] end nil end end # Public: Methods to manage the columns of an AsciiDoc table. In particular, it # keeps track of the column specs class Table::Column < AbstractNode # Public: Get/Set the style Symbol for this column. attr_accessor :style def initialize table, index, attributes = {} super table, :table_column @style = attributes['style'] attributes['colnumber'] = index + 1 attributes['width'] ||= 1 attributes['halign'] ||= 'left' attributes['valign'] ||= 'top' update_attributes(attributes) end # Public: An alias to the parent block (which is always a Table) alias table parent # Internal: Calculate and assign the widths (percentage and absolute) for this column # # This method assigns the colpcwidth and colabswidth attributes. # # returns the resolved colpcwidth value def assign_width col_pcwidth, width_base, precision if width_base col_pcwidth = (@attributes['width'].to_f * 100.0 / width_base).truncate precision col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth end if parent.attributes['tableabswidth'] @attributes['colabswidth'] = (col_abswidth = ((col_pcwidth / 100.0) * parent.attributes['tableabswidth']).truncate precision) == col_abswidth.to_i ? col_abswidth.to_i : col_abswidth end @attributes['colpcwidth'] = col_pcwidth end def block? false end def inline? false end end # Public: Methods for managing the a cell in an AsciiDoc table. class Table::Cell < AbstractBlock DOUBLE_LF = LF * 2 # Public: An Integer of the number of columns this cell will span (default: nil) attr_accessor :colspan # Public: An Integer of the number of rows this cell will span (default: nil) attr_accessor :rowspan # Public: An alias to the parent block (which is always a Column) alias column parent # Internal: Returns the nested Document in an AsciiDoc table cell (only set when style is :asciidoc) attr_reader :inner_document def initialize column, cell_text, attributes = {}, opts = {} super column, :table_cell @source_location = opts[:cursor].dup if @document.sourcemap if column cell_style = column.attributes['style'] unless (in_header_row = column.table.header_row?) # REVIEW feels hacky to inherit all attributes from column update_attributes column.attributes end # NOTE if attributes is defined, we know this is a psv cell; implies text needs to be stripped if attributes if attributes.empty? @colspan = @rowspan = nil else @colspan, @rowspan = (attributes.delete 'colspan'), (attributes.delete 'rowspan') # TODO delete style attribute from @attributes if set cell_style = attributes['style'] || cell_style unless in_header_row update_attributes attributes end if cell_style == :asciidoc asciidoc = true inner_document_cursor = opts[:cursor] if (cell_text = cell_text.rstrip).start_with? LF lines_advanced = 1 lines_advanced += 1 while (cell_text = cell_text.slice 1, cell_text.length).start_with? LF # NOTE this only works if we remain in the same file inner_document_cursor.advance lines_advanced else cell_text = cell_text.lstrip end elsif cell_style == :literal literal = true cell_text = cell_text.rstrip # QUESTION should we use same logic as :asciidoc cell? strip leading space if text doesn't start with newline? cell_text = cell_text.slice 1, cell_text.length while cell_text.start_with? LF else normal_psv = true # NOTE AsciidoctorJ uses nil cell_text to create an empty cell cell_text = cell_text ? cell_text.strip : '' end else @colspan = @rowspan = nil if cell_style == :asciidoc asciidoc = true inner_document_cursor = opts[:cursor] end end # NOTE only true for non-header rows if asciidoc # FIXME hide doctitle from nested document; temporary workaround to fix # nested document seeing doctitle and assuming it has its own document title parent_doctitle = @document.attributes.delete('doctitle') # NOTE we need to process the first line of content as it may not have been processed # the included content cannot expect to match conditional terminators in the remaining # lines of table cell content, it must be self-contained logic # QUESTION should we reset cell_text to nil? # QUESTION is is faster to check for :: before splitting? inner_document_lines = cell_text.split LF, -1 if (unprocessed_line1 = inner_document_lines[0]).include? '::' preprocessed_lines = (PreprocessorReader.new @document, [unprocessed_line1]).readlines unless unprocessed_line1 == preprocessed_lines[0] && preprocessed_lines.size < 2 inner_document_lines.shift inner_document_lines.unshift(*preprocessed_lines) unless preprocessed_lines.empty? end end unless inner_document_lines.empty? @inner_document = Document.new inner_document_lines, standalone: false, parent: @document, cursor: inner_document_cursor @document.attributes['doctitle'] = parent_doctitle unless parent_doctitle.nil? @subs = nil elsif literal @content_model = :verbatim @subs = BASIC_SUBS else if normal_psv && (cell_text.start_with? '[[') && LeadingInlineAnchorRx =~ cell_text Parser.catalog_inline_anchor $1, $2, self, opts[:cursor], @document end @content_model = :simple @subs = NORMAL_SUBS end @text = cell_text @style = cell_style end # Public: Get the String text of this cell with substitutions applied. # # Used for cells in the head row as well as text-only (non-AsciiDoc) cells in # the foot row and body. # # This method shouldn't be used for cells that have the AsciiDoc style. # # Returns the converted String text for this Cell def text apply_subs @text, @subs end # Public: Set the String text. # # This method shouldn't be used for cells that have the AsciiDoc style. # # Returns the new String text assigned to this Cell def text= val @text = val end # Public: Handles the body data (tbody, tfoot), applying styles and partitioning into paragraphs # # This method should not be used for cells in the head row or that have the literal or verse style. # # Returns the converted String for this Cell def content if (cell_style = @style) == :asciidoc @inner_document.convert elsif @text.include? DOUBLE_LF (text.split BlankLineRx).map do |para| cell_style && cell_style != :header ? (Inline.new parent, :quoted, para, type: cell_style).convert : para end elsif (subbed_text = text).empty? [] elsif cell_style && cell_style != :header [(Inline.new parent, :quoted, subbed_text, type: cell_style).convert] else [subbed_text] end end def lines @text.split LF end def source @text end # Public: Get the source file where this block started def file @source_location && @source_location.file end # Public: Get the source line number where this block started def lineno @source_location && @source_location.lineno end def to_s "#{super.to_s} - [text: #@text, colspan: #{@colspan || 1}, rowspan: #{@rowspan || 1}, attributes: #@attributes]" end end # Public: Methods for managing the parsing of an AsciiDoc table. Instances of this # class are primarily responsible for tracking the buffer of a cell as the parser # moves through the lines of the table using tail recursion. When a cell boundary # is located, the previous cell is closed, an instance of Table::Cell is # instantiated, the row is closed if the cell satisifies the column count and, # finally, a new buffer is allocated to track the next cell. class Table::ParserContext include Logging # Public: An Array of String keys that represent the table formats in AsciiDoc #-- # QUESTION should we recognize !sv as a valid format value? FORMATS = ['psv', 'csv', 'dsv', 'tsv'].to_set # Public: A Hash mapping the AsciiDoc table formats to default delimiters DELIMITERS = { 'psv' => ['|', /\|/], 'csv' => [',', /,/], 'dsv' => [':', /:/], 'tsv' => [?\t, /\t/], '!sv' => ['!', /!/], } # Public: The Table currently being parsed attr_accessor :table # Public: The AsciiDoc table format (psv, dsv, or csv) attr_accessor :format # Public: Get the expected column count for a row # # colcount is the number of columns to pull into a row # A value of -1 means we use the number of columns found # in the first line as the colcount attr_reader :colcount # Public: The String buffer of the currently open cell attr_accessor :buffer # Public: The cell delimiter for this table. attr_reader :delimiter # Public: The cell delimiter compiled Regexp for this table. attr_reader :delimiter_re def initialize reader, table, attributes = {} @start_cursor_data = (@reader = reader).mark @table = table if attributes.key? 'format' if FORMATS.include?(xsv = attributes['format']) if xsv == 'tsv' # NOTE tsv is just an alias for csv with a tab separator @format = 'csv' elsif (@format = xsv) == 'psv' && table.document.nested? xsv = '!sv' end else logger.error message_with_context %(illegal table format: #{xsv}), source_location: reader.cursor_at_prev_line @format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv') end else @format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv') end if attributes.key? 'separator' if (sep = attributes['separator']).nil_or_empty? @delimiter, @delimiter_rx = DELIMITERS[xsv] # QUESTION should we support any other escape codes or multiple tabs? elsif sep == '\t' @delimiter, @delimiter_rx = DELIMITERS['tsv'] else @delimiter, @delimiter_rx = sep, /#{::Regexp.escape sep}/ end else @delimiter, @delimiter_rx = DELIMITERS[xsv] end @colcount = table.columns.empty? ? -1 : table.columns.size @buffer = '' @cellspecs = [] @cell_open = false @active_rowspans = [0] @column_visits = 0 @current_row = [] @linenum = -1 end # Public: Checks whether the line provided starts with the cell delimiter # used by this table. # # returns true if the line starts with the delimiter, false otherwise def starts_with_delimiter?(line) line.start_with? @delimiter end # Public: Checks whether the line provided contains the cell delimiter # used by this table. # # returns Regexp MatchData if the line contains the delimiter, false otherwise def match_delimiter(line) @delimiter_rx.match(line) end # Public: Skip past the matched delimiter because it's inside quoted text. # # Returns nothing def skip_past_delimiter(pre) @buffer = %(#{@buffer}#{pre}#{@delimiter}) nil end # Public: Skip past the matched delimiter because it's escaped. # # Returns nothing def skip_past_escaped_delimiter(pre) @buffer = %(#{@buffer}#{pre.chop}#{@delimiter}) nil end # Public: Determines whether the buffer has unclosed quotes. Used for CSV data. # # returns true if the buffer has unclosed quotes, false if it doesn't or it # isn't quoted data def buffer_has_unclosed_quotes? append = nil if (record = append ? (@buffer + append).strip : @buffer.strip) == '"' true elsif record.start_with? '"' if ((trailing_quote = record.end_with? '"') && (record.end_with? '""')) || (record.start_with? '""') ((record = record.gsub '""', '').start_with? '"') && !(record.end_with? '"') else !trailing_quote end else false end end # Public: Takes a cell spec from the stack. Cell specs precede the delimiter, so a # stack is used to carry over the spec from the previous cell to the current cell # when the cell is being closed. # # returns The cell spec Hash captured from parsing the previous cell def take_cellspec @cellspecs.shift end # Public: Puts a cell spec onto the stack. Cell specs precede the delimiter, so a # stack is used to carry over the spec to the next cell. # # returns nothing def push_cellspec(cellspec = {}) # this shouldn't be nil, but we check anyway @cellspecs << (cellspec || {}) nil end # Public: Marks that the cell should be kept open. Used when the end of the line is # reached and the cell may contain additional text. # # returns nothing def keep_cell_open @cell_open = true nil end # Public: Marks the cell as closed so that the parser knows to instantiate a new cell # instance and add it to the current row. # # returns nothing def mark_cell_closed @cell_open = false nil end # Public: Checks whether the current cell is still open # # returns true if the cell is marked as open, false otherwise def cell_open? @cell_open end # Public: Checks whether the current cell has been marked as closed # # returns true if the cell is marked as closed, false otherwise def cell_closed? !@cell_open end # Public: If the current cell is open, close it. In additional, push the # cell spec captured from the end of this cell onto the stack for use # by the next cell. # # returns nothing def close_open_cell(next_cellspec = {}) push_cellspec next_cellspec close_cell(true) if cell_open? advance nil end # Public: Close the current cell, instantiate a new Table::Cell, add it to # the current row and, if the number of expected columns for the current # row has been met, close the row and begin a new one. # # returns nothing def close_cell(eol = false) if @format == 'psv' cell_text = @buffer @buffer = '' if (cellspec = take_cellspec) repeat = cellspec.delete('repeatcol') || 1 else logger.error message_with_context 'table missing leading separator; recovering automatically', source_location: Reader::Cursor.new(*@start_cursor_data) cellspec = {} repeat = 1 end else cell_text = @buffer.strip @buffer = '' cellspec = nil repeat = 1 if @format == 'csv' && !cell_text.empty? && cell_text.include?('"') # this may not be perfect logic, but it hits the 99% if cell_text.start_with?('"') && cell_text.end_with?('"') # unquote if (cell_text = cell_text.slice(1, cell_text.length - 2)) # trim whitespace and collapse escaped quotes cell_text = cell_text.strip.squeeze('"') else logger.error message_with_context 'unclosed quote in CSV data; setting cell to empty', source_location: @reader.cursor_at_prev_line cell_text = '' end else # collapse escaped quotes cell_text = cell_text.squeeze('"') end end end 1.upto(repeat) do |i| # TODO make column resolving an operation if @colcount == -1 @table.columns << (column = Table::Column.new(@table, @table.columns.size + i - 1)) if cellspec && (cellspec.key? 'colspan') && (extra_cols = cellspec['colspan'].to_i - 1) > 0 offset = @table.columns.size extra_cols.times do |j| @table.columns << Table::Column.new(@table, offset + j) end end else # QUESTION is this right for cells that span columns? unless (column = @table.columns[@current_row.size]) logger.error message_with_context 'dropping cell because it exceeds specified number of columns', source_location: @reader.cursor_before_mark return end end cell = Table::Cell.new(column, cell_text, cellspec, cursor: @reader.cursor_before_mark) @reader.mark unless !cell.rowspan || cell.rowspan == 1 activate_rowspan(cell.rowspan, (cell.colspan || 1)) end @column_visits += (cell.colspan || 1) @current_row << cell # don't close the row if we're on the first line and the column count has not been set explicitly # TODO perhaps the colcount/linenum logic should be in end_of_row? (or a should_end_row? method) close_row if end_of_row? && (@colcount != -1 || @linenum > 0 || (eol && i == repeat)) end @cell_open = false nil end private # Internal: Close the row by adding it to the Table and resetting the row # Array and counter variables. # # returns nothing def close_row @table.rows.body << @current_row # don't have to account for active rowspans here # since we know this is first row @colcount = @column_visits if @colcount == -1 @column_visits = 0 @current_row = [] @active_rowspans.shift @active_rowspans[0] ||= 0 nil end # Internal: Activate a rowspan. The rowspan Array is consulted when # determining the effective number of cells in the current row. # # returns nothing def activate_rowspan(rowspan, colspan) 1.upto(rowspan - 1) {|i| @active_rowspans[i] = (@active_rowspans[i] || 0) + colspan } nil end # Internal: Check whether we've met the number of effective columns for the current row. def end_of_row? @colcount == -1 || effective_column_visits == @colcount end # Internal: Calculate the effective column visits, which consists of the number of # cells plus any active rowspans. def effective_column_visits @column_visits + @active_rowspans[0] end # Internal: Advance to the next line (which may come after the parser begins processing # the next line if the last cell had wrapped content). def advance @linenum += 1 end end end