# This is my first attempt at breaking down raw markdown # into a tree / hierarchy, so that it can be used to do other # things / create other entities. # module Brief class Parser attr_accessor :content, :options, :raw_tree, :checksum # Regexes HeadingRegex = /^#+/ # Exceptions MissingHeadings = Class.new(Exception) HeadingsNotUnique = Class.new(Exception) def initialize(content="",options={}) @options = options.dup @content = content @checksum = options.fetch(:checksum, Digest::MD5.hexdigest(content)) scan parse end def tree @tree ||= Brief::Tree.new(tree_nodes.dup.freeze) end def items(sorted=true) sorted ? tree.items.sort_by(&:sort_index) : tree.items end def headings_at_level level, options={} as_objects = options.fetch(:as_objects, false) items = heading_lines.select {|line| line.level == level }.sort_by(&:sort_index) as_objects ? items : items.map(&:content) end def next_sibling_of(heading_line) if heading_line.is_a?(String) heading_line = heading_lines.detect {|h| h.content == heading_line } end heading_lines.detect {|l| l.level <= heading_line.level && l.number > heading_line.number } end def body_content_for(heading_line) if heading_line.is_a?(String) heading_line = heading_lines.detect {|h| h.content == heading_line } end min = heading_line.number + 1 max = next_sibling_of(heading_line).try(:number).try(:-, 1) || last_line_number parsed_lines.select {|p| p.number.between?(min,max) }.map(&:raw).map(&:strip).join("") end def lines_between_boundaries *bounds bounds.map do |range| a = range.min - 1 b = range.max - a parsed_lines.slice(a, b).map(&:raw) end end def last_line_number parsed_lines.last.try(:number) || raw_lines.length + 1 end def parser self end def extract_frontmatter! if raw_lines.any? {|l| l.match(/```settings/) } markers = identify_codemarkers if raw_lines[markers.first].to_s.match(/```settings/) length = markers[1] - markers[0] @front_matter = raw_lines.slice(markers[0] + 1, length - 1).join remove_raw_lines(markers[0], markers[1]) @content = raw_lines.join.gsub(/^\n|\n$/,'') end end end def front_matter YAML.load(@front_matter) rescue {} end def identify_codemarkers markers = [] raw_lines.each_with_index do |line, index| if line.match(Brief::Line::CodeBlockRegex) markers << index end end markers end def scan extract_frontmatter! @code_markers = identify_codemarkers @heading_markers = identify_heading_markers @scanned = true end def identify_heading_markers heading_markers = [] raw_lines.each_with_index do |line, index| if line.match(Brief::Line::HeadingRegex) && !is_line_inside_code_region?(index) heading_markers << index end end heading_markers end def heading_boundaries @heading_markers && @heading_markers.each_slice(2).map do |slice| Range.new slice.first + 1, slice.last + 1 end end def code_boundaries @code_markers && @code_markers.each_slice(2).map do |slice| Range.new slice.first + 1, slice.last + 1 end end def valid? @validated.presence || validate end def validate scan unless @scanned unless heading_lines.length > 0 raise MissingHeadings, 'A Brief must include some headings' end if heading_lines.uniq.length < heading_lines.length raise HeadingsNotUnique, 'Headings inside a brief must be unique' end @validated = true end # This syntax isn't right, but it works. # TODO: Research the best way to do this. # middleman had an example of using the &method(:method_name) # to treat a method definition as a proc def tree_visitor node subheadings = headings_under(node, as_objects: true) children = subheadings.map(&method(:tree_visitor)) id = "#{ checksum }_#{ node.sort_index.join('_') }" children.each_with_index do |child, index| child[:heading_index] = index end base = { id: id, level: node.level, children: children, title: node.content, sort_index: node.sort_index, line_number: node.line_number } base.merge! content: body_content_for(node).to_s.strip if node.heading? base end def tree_nodes @tree_nodes ||= begin nodes = [] headings_at_level(highest_level, as_objects: true).each_with_index do |node, index| element = tree_visitor(node) element[:heading_index] = index nodes << element end nodes.map {|node| Hashie::Mash.new(node) }.flatten end end def elements @elements ||= tree.elements end def maximum_level elements.map(&:level).max end def line_at(number) parsed_lines.detect {|line| line.number == number } end def index_of(line) lines.index(line) end def next_heading_after heading, options={} heading_lines.detect {|line| line.number > heading.number } end def highest_level level_boundaries.last end def lowest_level level_boundaries.first end def level_boundaries levels = heading_lines.map(&:level).uniq [levels.max, levels.min] end def heading_lines parsed_lines.select do |line| line.heading? end end def is_line_inside_code_region?(line_number) code_boundaries.any? {|range| range.include?(line_number) } end def headings_after(heading_line) if heading_line.is_a?(String) heading_line = heading_lines.detect {|h| h.content == heading_line } end heading_lines.select {|line| line.number > heading_line.number } end def headings_under(heading_line, options={}) matches = [] continue = true include_subtree = options.fetch(:all, false) as_objects = options.fetch(:as_objects, false) if heading_line.is_a?(String) heading_line = heading_lines.detect {|h| h.content == heading_line } end headings_after(heading_line).each do |line| continue = false if line.level <= heading_line.level if continue && line.level > heading_line.level matches << line unless (!include_subtree && line.level - heading_line.level > 1) end end as_objects ? matches : matches.map(&:content) end def content_lines_under(heading_line, options={}) reject_blank = !options.fetch(:include_blank, true) as_objects = options.fetch(:as_objects, false) if heading_line.is_a?(String) heading_line = heading_lines.detect {|h| h.content == heading_line } end return [] unless heading_line.respond_to?(:number) min = heading_line.number max = next_heading_after(heading_line).try(:number) || last_line_number matches = content_lines.select do |line| line.content? && line.number.between?(min, max) end matches.reject! {|m| m.content.blank? } if reject_blank as_objects ? matches : matches.map(&:content) end def content_lines parsed_lines.select(&:content?) end def code_block_markers parsed_lines.select {|l| l.type == "code_block_marker" } end def raw_lines @raw_lines ||= @content.lines.to_a end # I really need to get better with arrays def remove_raw_lines start_index, end_index original = @raw_lines.dup copy = [] original.each_with_index do |line, index| copy.push(line) unless index.between? start_index, end_index end @raw_lines = copy end def stripped_lines @stripped ||= raw_lines.map(&:strip) end def parsed_lines @parsed_lines end def parse clear=false @parsed_lines = nil if clear return @parsed_lines if @parsed_lines parsed = [] raw = raw_lines stripped_lines.each_with_index do |line,index| is_code = code_boundaries.any? {|bounds| bounds.include?(index) } line = Brief::Line.new(line, index, is_code) line.raw = raw[index] parsed << line end @parsed_lines = parsed end def code_blocks_by_language return @code_samples if @code_samples bounds = Array(parser && parser.send(:code_boundaries)) sections = parser.lines_between_boundaries(*bounds) @code_samples = sections.inject({}) do |memo,section| marker = section.first.split('```').last language = marker.nil? ? :text : marker.strip section.shift && section.pop memo[language.to_sym] ||= [] memo[language.to_sym] << section.join("").sub(/^\s+/,'') memo end end end end