require 'tap/support/schema'
autoload(:Shellwords, 'shellwords')
module Tap
module Support
# == Syntax
#
# ==== Round Assignment
# Tasks can be defined and set to a round using the following:
#
# break assigns task(s) to round
# -- next 0
# --+ next 1
# --++ next 2
# --+2 next 2
# --+2[1,2,3] 1,2,3 2
#
# Here all task (except c) are parsed into round 0, then the
# final argument reassigns e to round 3.
#
# schema = Parser.new("a -- b --+ c -- d -- e --+3[4]").schema
# schema.rounds(true) # => [[0,1,3],[2], nil, [4]]
#
# ==== Workflow Assignment
# All simple workflow patterns except switch can be specified within
# the parse syntax (switch is the exception because there is no good
# way to define the switch block).
#
# break pattern source(s) target(s)
# --: sequence last next
# --[] fork last next
# --{} merge next last
# --() sync_merge next last
#
# example meaning
# --1:2 1.sequence(2)
# --1:2:3 1.sequence(2,3)
# --:2: last.sequence(2,next)
# --[] last.fork(next)
# --1{2,3,4} 1.merge(2,3,4)
# --(2,3,4) last.sync_merge(2,3,4)
#
# Note how all of the bracketed styles behave similarly; they are
# parsed with essentially the same code, but reverse the source
# and target in the case of merges.
#
# Here a and b are sequenced inline. Task c is assigned to no
# workflow until the final argument which sequenced b and c.
#
# schema = Parser.new("a --: b -- c --1:2i").schema
# schema.argvs # => [["a"], ["b"], ["c"], []]
# schema.joins(true) # => [[:sequence,0,[1],{}], [:sequence,1,[2],{:iterate => true}]]
#
# ==== Globals
# Global instances of task (used, for example, by dependencies) may
# be assigned in the parse syntax as well. The break for a global
# is '--*'.
#
# schema = Parser.new("a -- b --* global_name --config for --global").schema
# schema.globals(true) # => [2]
#
# ==== Escapes and End Flags
# Breaks can be escaped by enclosing them in '-.' and '.-' delimiters;
# any number of arguments may be enclosed within the escape. After the
# end delimiter, breaks are active once again.
#
# schema = Parser.new("a -- b -- c").schema
# schema.argvs # => [["a"], ["b"], ["c"]]
#
# schema = Parser.new("a -. -- b .- -- c").schema
# schema.argvs # => [["a", "--", "b"], ["c"]]
#
# Parsing continues until the end of argv, or a an end flag '---' is
# reached. The end flag may also be escaped.
#
# schema = Parser.new("a -- b --- c").schema
# schema.argvs # => [["a"], ["b"]]
#
class Parser
# A set of parsing routines used internally by Tap::Support::Parser,
# modularized for ease of testing, and potential re-use. These methods
# require that current_index and previous_index be
# implemented in the including class.
module Utils
module_function
# Defines a break regexp that matches a bracketed-pairs
# break. The left and right brackets are specified as
# inputs. After a match:
#
# $1:: The source string after the break.
# (ex: '[]' => '', '1[]' => '1')
# $2:: The target string.
# (ex: '[]' => '', '1[1,2,3]' => '1,2,3')
# $3:: The modifier string.
# (ex: '[]i' => 'i', '1[1,2,3]is' => 'is')
#
def bracket_regexp(l, r)
/\A(\d*)#{Regexp.escape(l)}([\d,]*)#{Regexp.escape(r)}([A-z]*)\z/
end
# The escape begin argument
ESCAPE_BEGIN = "-."
# The escape end argument
ESCAPE_END = ".-"
# The parser end flag
END_FLAG = "---"
# Matches any breaking arg (ex: '--', '--+', '--1:2')
# After the match:
#
# $1:: The string after the break
# (ex: '--' => '', '--++' => '++', '--1(2,3)' => '1(2,3)')
#
BREAK = /\A--(\z|[\+\d\:\*\[\{\(].*\z)/
# Matches an execution-round break. After the match:
#
# $2:: The round string, or nil.
# (ex: '' => nil, '++' => '++', '+1' => '+1')
# $5:: The target string, or nil.
# (ex: '+' => nil, '+[1,2,3]' => '1,2,3')
#
ROUND = /\A((\+(\d*|\+*))(\[([\d,]*)\])?)?\z/
# Matches a sequence break. After the match:
#
# $1:: The sequence string after the break.
# (ex: ':' => ':', '1:2' => '1:2', '1:' => '1:', ':2' => ':2')
# $3:: The modifier string.
# (ex: ':i' => 'i', '1:2is' => 'is')
#
SEQUENCE = /\A(\d*(:\d*)+)([A-z]*)\z/
# Matches an instance break. After the match:
#
# $1:: The index string after the break.
# (ex: '*' => '', '*1' => '1')
#
INSTANCE = /\A\*(\d*)\z/
# A break regexp using "[]"
FORK = bracket_regexp("[", "]")
# A break regexp using "{}"
MERGE = bracket_regexp("{", "}")
# A break regexp using "()"
SYNC_MERGE = bracket_regexp("(", ")")
# Parses an indicies str along commas, and collects the indicies
# as integers. Ex:
#
# parse_indicies('') # => []
# parse_indicies('1') # => [1]
# parse_indicies('1,2,3') # => [1,2,3]
#
def parse_indicies(str, regexp=/,+/)
indicies = []
str.split(regexp).each do |n|
indicies << n.to_i unless n.empty?
end
indicies
end
# Parses the match of a ROUND regexp into a round index
# and an array of task indicies that should be added to the
# round. The inputs correspond to $2 and $5 for the match.
#
# If $2 is nil, a round index of zero is assumed; if $5 is
# nil or empty, then indicies of [:current_index] are assumed.
#
# parse_round("+", "") # => [1, [:current_index]]
# parse_round("+2", "1,2,3") # => [2, [1,2,3]]
# parse_round(nil, nil) # => [0, [:current_index]]
#
def parse_round(two, five)
index = case two
when nil then 0
when /\d/ then two[1, two.length-1].to_i
else two.length
end
[index, five.to_s.empty? ? [current_index] : parse_indicies(five)]
end
# Parses the match of a SEQUENCE regexp into an [indicies, options]
# array. The inputs corresponds to $1 and $3 for the match. The
# previous and current index are assumed if $1 starts and/or ends
# with a semi-colon.
#
# parse_sequence("1:2:3", '') # => [[1,2,3], {}]
# parse_sequence(":1:2:", '') # => [[:previous_index,1,2,:current_index], {}]
#
def parse_sequence(one, three)
seq = parse_indicies(one, /:+/)
seq.unshift previous_index if one[0] == ?:
seq << current_index if one[-1] == ?:
[seq, parse_options(three)]
end
# Parses the match of an INSTANCE regexp into an index.
# The input corresponds to $1 for the match. The current
# index is assumed if $1 is empty.
#
# parse_instance("1") # => 1
# parse_instance("") # => :current_index
#
def parse_instance(one)
one.empty? ? current_index : one.to_i
end
# Parses the match of an bracket_regexp into a [source_index,
# target_indicies, options] array. The inputs corresponds to $1,
# $2, and $3 for the match. The previous and current index are
# assumed if $1 and/or $2 is empty.
#
# parse_bracket("1", "2,3", "") # => [1, [2,3], {}]
# parse_bracket("", "", "") # => [:previous_index, [:current_index], {}]
# parse_bracket("1", "", "") # => [1, [:current_index], {}]
# parse_bracket("", "2,3", "") # => [:previous_index, [2,3], {}]
#
def parse_bracket(one, two, three)
targets = parse_indicies(two)
targets << current_index if targets.empty?
[one.empty? ? previous_index : one.to_i, targets, parse_options(three)]
end
# Parses an options string into a hash. The input corresponds
# to $3 in a SEQUENCE or bracket_regexp match. Raises an error
# if the options string contains unknown options.
#
# parse_options("") # => {}
# parse_options("is") # => {:iterate => true, :stack => true}
#
def parse_options(three)
options = {}
0.upto(three.length - 1) do |char_index|
char = three[char_index, 1]
unless index = Join::SHORT_FLAGS.index(char)
raise "unknown option in: #{three} (#{char})"
end
options[Join::FLAGS[index]] = true
end
options
end
# Parses an arg hash into a schema argv. An arg hash is a hash
# using numeric keys to specify the [row][col] in a two-dimensional
# array where a set of values should go. Breaks are added between
# rows (if necessary) and the array is collapsed to yield the
# argv:
#
# argh = {
# 0 => {
# 0 => 'a',
# 1 => ['b', 'c']},
# 1 => 'z'
# }
# parse_argh(argh) # => ['--', 'a', 'b', 'c', '--', 'z']
#
# Non-numeric keys are converted to integers using to_i and
# existing breaks (such as workflow breaks) occuring at the
# start of a row are preseved.
#
# argh = {
# '0' => {
# '0' => 'a',
# '1' => ['b', 'c']},
# '1' => ['--:', 'z']
# }
# parse_argh(argh) # => ['--', 'a', 'b', 'c', '--:', 'z']
#
def parse_argh(argh)
rows = []
argh.each_pair do |row, values|
if values.kind_of?(Hash)
arry = []
values.each_pair {|col, value| arry[col.to_i] = value }
values = arry
end
rows[row.to_i] = values
end
argv = []
rows.each do |row|
row = [row].flatten.compact
if row.empty? || row[0] !~ BREAK
argv << '--'
end
argv.concat row
end
argv
end
end
include Utils
# The schema into which nodes are parsed
attr_reader :schema
def initialize(argv=[])
@current_index = 0
@schema = Schema.new
parse(argv)
end
# Iterates through the argv splitting out task and workflow definitions.
# Task definitions are split out (with configurations) along round and/or
# workflow break lines. Rounds and workflows are dynamically parsed;
# tasks may be reassigned to different rounds or workflows by later
# arguments.
#
# Parse is non-destructive to argv. If a string argv is provided, parse
# splits it into an array using Shellwords; if a hash argv is provided,
# parse converts it to an array using Parser::Utils#parse_argh.
#
def parse(argv)
parse!(argv.kind_of?(String) ? argv : argv.dup)
end
# Same as parse, but removes parsed args from argv.
def parse!(argv)
argv = case argv
when Array then argv
when String then Shellwords.shellwords(argv)
when Hash then parse_argh(argv)
else argv
end
argv.unshift('--')
escape = false
current_argv = schema[current_index].argv
while !argv.empty?
arg = argv.shift
# if escaping, add escaped arguments
# until an escape-end argument
if escape
if arg == ESCAPE_END
escape = false
else
current_argv << arg
end
next
end
case arg
when ESCAPE_BEGIN
# begin escaping if indicated
escape = true
when END_FLAG
# break on an end-flag
break
when BREAK
# a breaking argument was reached:
# unless the current argv is empty,
# append and start a new definition
unless current_argv.empty?
self.current_index += 1
current_argv = schema[current_index].argv
end
# parse the break string for any
# schema modifications
parse_break($1)
else
# add all other non-breaking args to
# the current argv; this includes
# both inputs and configurations
current_argv << arg
end
end
schema
end
def load(argv)
argv.each do |arg|
case arg
when Array
schema.nodes << arg
self.current_index += 1
else
parse_break(arg)
end
end
schema
end
protected
# The index of the node currently being parsed.
attr_accessor :current_index # :nodoc:
# Returns current_index-1, or raises an error if current_index < 1.
def previous_index # :nodoc:
raise ArgumentError, 'there is no previous index' if current_index < 1
current_index - 1
end
# determines the type of break and modifies self appropriately
def parse_break(arg) # :nodoc:
case arg
when ROUND
round, indicies = parse_round($2, $5)
indicies.each {|index| schema[index].round = round }
when SEQUENCE
indicies, options = parse_sequence($1, $3)
while indicies.length > 1
schema.set(Joins::Sequence, indicies.shift, indicies[0], options)
end
when INSTANCE then schema[parse_instance($1)].globalize
when FORK then schema.set(Joins::Fork, *parse_bracket($1, $2, $3))
when MERGE then schema.set(Joins::Merge, *parse_bracket($1, $2, $3))
when SYNC_MERGE then schema.set(Joins::SyncMerge, *parse_bracket($1, $2, $3))
else raise ArgumentError, "invalid break argument: #{arg}"
end
end
end
end
end