lib/table_extractor.rb in markdown_exec-2.4.0 vs lib/table_extractor.rb in markdown_exec-2.5.0

- old
+ new

@@ -1,23 +1,22 @@ # frozen_string_literal: true class TableExtractor # Extract tables from an array of text lines formatted in Markdown style # @param [Array<String>] lines The array of text lines - # @return [Array<Hash>] An array of tables with row count, column count, and start index - def self.extract_tables(lines) + # @return [Array<Hash>] An array of tables with row count, + # column count, and start index + def self.extract_tables(lines, regexp:) tables = [] inside_table = false table_start = nil row_count = 0 column_count = 0 - separator_regexp = /^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)+\|? *$/ - lines.each_with_index do |line, index| # Match line separators with at least 2 columns - if line.strip.match?(separator_regexp) + if line.strip.match?(regexp) if inside_table # Add the current table before starting a new one tables << { rows: row_count, columns: column_count, @@ -61,30 +60,34 @@ return if $PROGRAM_NAME != __FILE__ require 'minitest/autorun' class TestTableExtractor < Minitest::Test + @@regexp = /^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/ + def test_single_table lines = [ '| Species| Genus| Family', '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [{ rows: 4, columns: 3, start_index: 0 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_indented_table lines = [ "\t | Species| Genus| Family", "\t |-|-|-", "\t | Pongo tapanuliensis| Pongo| Hominidae", "\t | | Histiophryne| Antennariidae" ] expected = [{ rows: 4, columns: 3, start_index: 0 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_multiple_tables lines = [ '| Species| Genus| Family', @@ -98,20 +101,22 @@ ] expected = [ { rows: 4, columns: 3, start_index: 0 }, { rows: 3, columns: 2, start_index: 5 } ] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_no_tables lines = [ 'This is a regular line.', 'Another regular line.' ] expected = [] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_inconsistent_columns lines = [ '| Species| Genus| Family', @@ -124,11 +129,12 @@ '| Tapanuli Orangutan| Pongo tapanuliensis' ] # number of columns determined from row of dividers expected = [{ rows: 4, columns: 2, start_index: 0 }, { rows: 3, columns: 3, start_index: 5 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_table_at_end_of_lines lines = [ 'Some introductory text.', @@ -136,11 +142,12 @@ '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [{ rows: 4, columns: 3, start_index: 1 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_table_without_starting_pipe lines = [ 'Some introductory text.', @@ -148,19 +155,21 @@ '|-|-|-', '| Pongo tapanuliensis| Pongo| Hominidae', '| | Histiophryne| Antennariidae' ] expected = [{ rows: 4, columns: 3, start_index: 1 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end def test_table_with_colon_hyphens lines = [ '| Name| Age| City', '|:-:|:-|:-:', '| John Doe| 30| New York', '| Jane Doe| 25| Los Angeles' ] expected = [{ rows: 4, columns: 3, start_index: 0 }] - assert_equal expected, TableExtractor.extract_tables(lines) + assert_equal expected, + TableExtractor.extract_tables(lines, regexp: @@regexp) end end