module Todoloo # Read https://kschiess.github.io/parslet/get-started.html class Parser < Parslet::Parser TASK_TYPES = %w[TODO NOTE FIXME HBD HACK XXX] root :file # Returns a list of tasks def parse_and_transform(text, path: "") tree = parse(text) pp tree if Todoloo.debug? Transformer.new.apply(tree).map do |hash| line, column = hash.fetch(:type).line_and_column Task.from_hash(hash.merge(path: path, line: line, column: column)) end end def safe_parse(text) parse(text) rescue Parslet::ParseFailed => e puts e.parse_failure_cause.ascii_tree nil end # Extension to automatically define `?` predicate rules def self.rule(name, *args, **kwargs, &block) if name.to_s.end_with?("?") super else super # Define the original rule super("#{name}?") { public_send(name).maybe } end end # Single char rules rule(:lparen) { str("(") } rule(:rparen) { str(")") } rule(:eof) { any.absent? } rule(:eol) { str("\n") } rule(:space) { match(/[ \t]/) } rule(:code_text) { match(/[^#\n]/) } # FIXME: Cannot handle JS comments yet rule(:comment_char) { str("#") } # || str("//") } # Can read all the way without honoring anything special, # since its use will only be when the comment has already started rule(:comment_text) { match(/[^\n]/) } # Parts rule(:spacing) { space.repeat(1) } rule(:code) { code_text.repeat(1).as(:code) } rule(:line) { (comment | code >> comment | code).as(:line) } rule(:file) { (line >> eol | eol | line >> eof).repeat.as(:file) } rule(:comment) { (comment_start >> (task.as(:task) | line).maybe).as(:comment) } rule(:comment_start) do comment_char.capture(:comment_start) >> spacing? end # examples # TODO(topic): My task # TODO: My task # TODO: My task rule(:task) { task_type.as(:type).capture(:type) >> topics.repeat(0, 1).as(:topics) >> task_separator? >> spacing? >> description.repeat(0, 1).as(:description) } rule(:description) { multiline_description | single_line_description } rule(:description_text) { comment_text.repeat(1) } rule(:single_line_description) { description_text.as(:text) } rule(:multiline_description) do (description_text >> comment_continuation.repeat(1)).as(:text) end MIN_INDENTATION_BEYOND_TYPE_START = 1 def comment_continuation dynamic do |_source, context| parser = match('[\n]') comment_start = column_offset_of_capture(:comment_start, context) # Match any non comment content before the comment starts parser = ignore_code_text(parser, comment_start) # Consume the characther that the comment opened with, like a `#` or `//` parser >>= str(context.captures[:comment_start]).ignore parser >>= (spacing? >> task_type).absent? required_indentation = column_offset_of_capture(:type, context) - comment_start - context.captures[:comment_start].length + MIN_INDENTATION_BEYOND_TYPE_START parser = indentation(parser, required_indentation) # parser >>= task_type.absent? parser >>= description_text parser end end # Calculates zero-based column offset for the given capture. # Use only within dynamic blocks def column_offset_of_capture(name, context) case name when :type # FIXME: Not sure yet why this is even necessary... _, col_index = context.captures[:type][:type].line_and_column else _, col_index = context.captures[name].line_and_column end col_index - 1 end def ignore_code_text(parser, count) if count.positive? parser >> code_text.repeat(count, count).ignore else parser end end def indentation(parser, count) return parser unless count.positive? parser >> space.repeat(count, count) end rule(:task_type) do TASK_TYPES .map { |t| str(t) } .reduce do |result, partial_matcher| result | partial_matcher end end rule(:topic) { match(/[^),]/).repeat(1).as(:topic) } rule(:topic_rest) { (str(",") >> spacing.maybe >> topic).repeat(0, 1) } # (topic1, topic2) rule(:topics) { lparen >> topic >> topic_rest >> rparen } rule(:task_separator) { str(":") } rule(:task_separator?) { task_separator.maybe } end end