module Stamina
#
# Automaton Description Language module. This module provides parsing and
# printing methods for automata and samples. Documentation of the file format
# used for an automaton is given in parse_automaton; file format for samples is
# documented in parse_sample.
#
# Methods of this module are not intended to be included by a class but invoked
# on the module instead:
#
# begin
# dfa = Stamina::ADL.parse_automaton_file("my_automaton.adl")
# rescue ADL::ParseError => ex
# puts "Oops, the ADL automaton file seems corrupted..."
# end
#
# == Detailed API
module ADL
#################################################################################
# Automaton Section #
#################################################################################
#
# Parses a given automaton description and returns an Automaton instance.
#
# Raises:
# - ArgumentError unless _descr_ is an IO object or a String.
# - ADL::ParseError if the ADL automaton format is not respected.
#
# ADL provides a really simple grammar to describe automata. Here is a succint
# example (full documentation of the ADL automaton grammar can be found in
# the self-documenting example/adl/automaton.adl file).
#
# # Some header comments: tool which has generated this automaton,
# # maybe a date or other tool options ...
# # here: 'this automaton accepts the a(ba)* regular language'
# 2 2
# 0 true false
# 1 false true
# 0 1 a
# 1 0 b
#
def self.parse_automaton(descr)
automaton = nil
ADL::to_io(descr) do |io|
state_count, edge_count = nil, nil
state_read, edge_read = 0, 0
states = {}
mode = :header
automaton = Automaton.new do |fa|
# parse each description line
line_number = 1
io.each_line do |l|
index = l.index('#')
l = l[0,index] if index
l = l.strip
next if l.empty? or l[0,1]=='#'
case mode
when :header
# looking for |state_count edge_count|
raise(ADL::ParseError,
"Parse error line #{line_number}: 'state_count edge_count' expected, "\
"'#{l}' found.") unless /^(\d+)\s+(\d+)$/ =~ l
state_count, edge_count = $1.to_i, $2.to_i
mode = :states
when :states
# looking for |number initial accepting|
raise(ADL::ParseError,
"Parse error line #{line_number}: state definition expected, "\
"'#{l}' found.") unless /^(\S+)\s+(true|false)\s+(true|false)(\s+(true|false))?$/ =~ l
id, initial, accepting, error = $1, $2, $3, $5
initial, accepting, error = ("true"==initial), ("true"==accepting), ("true"==error)
state = fa.add_state(:initial => initial, :accepting => accepting, :error => error)
state[:name]=id.to_s
states[id] = state
state_read += 1
mode = (edge_count==0 ? :end : :edges) if state_read==state_count
when :edges
# looking for |source target symbol|
raise(ADL::ParseError,
"Parse error line #{line_number}: edge definition expected, "\
"'#{l}' found.") unless /^(\S+)\s+(\S+)\s+(\S+)$/ =~ l
source, target, symbol = $1, $2, $3
raise(ADL::ParseError,
"Parse error line #{line_number}: no such state #{source}") \
unless states[source]
raise(ADL::ParseError,
"Parse error line #{line_number}: no such state #{target}") \
unless states[target]
fa.connect(states[source], states[target], {:symbol => symbol})
edge_read += 1
mode = :end if edge_read==edge_count
when :end
raise(ADL::ParseError,
"Parse error line #{line_number}: trailing data found '#{l}")
end # case mode
line_number += 1
end
raise(ADL::ParseError, "Parse error: #{state_count} states annouced, "\
"#{state_read} found.") if state_count != state_read
raise(ADL::ParseError, "Parse error: #{edge_count} edges annouced, "\
"#{edge_read} found.") if edge_count != edge_read
end # Automaton.new
end
return automaton
end # def self.parse
#
# Parses an automaton file _f_.
#
# Shortcut for:
# File.open(f, 'r') do |io|
# Stamina::ADL.parse_automaton(io)
# end
#
def self.parse_automaton_file(f)
automaton = nil
File.open(f) do |file|
automaton = ADL::parse_automaton(file)
end
automaton
end
#
# Prints an automaton to a buffer (responding to :<<
) in ADL
# format. Returns the buffer itself.
#
def self.print_automaton(fa, buffer="")
buffer << "#{fa.state_count.to_s} #{fa.edge_count.to_s}" << "\n"
fa.states.each do |s|
buffer << "#{s.index.to_s} #{s.initial?} #{s.accepting?}" << (s.error? ? " true" : "") << "\n"
end
fa.edges.each do |e|
buffer << "#{e.source.index.to_s} #{e.target.index.to_s} #{e.symbol.to_s}" << "\n"
end
buffer
end
#
# Prints an automaton to a file whose path is provided.
#
# Shortcut for:
# File.open(file, 'w') do |io|
# print_automaton(fa, io)
# end
#
def self.print_automaton_to_file(fa, file)
File.open(file, 'w') do |io|
print_automaton(fa, io)
end
end
#################################################################################
# String and Sample Section #
#################################################################################
#
# Parses an input string _str_ and returns a InputString instance. Format of
# input strings is documented in parse_sample. _str_ is required to be a ruby
# String.
#
# Raises:
# - ADL::ParseError if the ADL string format is not respected.
#
def self.parse_string(str)
symbols = str.split(' ')
case symbols[0]
when '+'
symbols.shift
InputString.new symbols, true, false
when '-'
symbols.shift
InputString.new symbols, false, false
when '?'
symbols.shift
InputString.new symbols, nil, false
else
raise ADL::ParseError, "Invalid string format #{str}", caller
end
end
#
# Parses the sample provided by _descr_. When a block is provided, yields it with
# InputString instances and ignores the sample argument. Otherwise, fills the sample
# (any object responding to <<
) with string, creating a fresh new
# one (as a Sample instance) if sample is nil.
#
# ADL provides a really simple grammar to describe samples (here is a succint
# example, the full documentation of the sample grammar can be found in the
# self-documenting example/adl/sample.adl file):
#
# #
# # Some header comments: tool which has generated this sample,
# # maybe a date or other tool options ...
# # here: 'this sample is caracteristic for the a(ba)* regular language'
# #
# # Positive, Negative, Unlabeled strings become with +, -, ?, respectively
# # Empty lines and lines becoming with # are simply ignored.
# #
# -
# + a
# - a b
# + a b a
#
# Raises:
# - ArgumentError unless _descr_ argument is an IO object or a String.
# - ADL::ParseError if the ADL sample format is not respected.
# - InconsistencyError if the sample is not consistent (see Sample)
#
def self.parse_sample(descr, sample=nil)
sample = Sample.new if (sample.nil? and not block_given?)
ADL::to_io(descr) do |io|
io.each_line do |l|
l = l.strip
next if l.empty? or l[0,1]=='#'
if sample.nil? and block_given?
yield parse_string(l)
else
sample << parse_string(l)
end
end
end
sample
end
#
# Parses an automaton file _f_.
#
# Shortuct for:
# File.open(f) do |file|
# sample = ADL::parse_sample(file, sample)
# end
#
def self.parse_sample_file(f, sample=nil)
File.open(f) do |file|
sample = ADL::parse_sample(file, sample)
end
sample
end
#
# Prints a sample in ADL format on a buffer. Sample argument is expected to be
# an object responding to each, yielding InputString instances. Buffer is expected
# to be an object responding to <<
.
#
def self.print_sample(sample, buffer="")
sample.each do |str|
buffer << str.to_s << "\n"
end
end
#
# Prints a sample in a file.
#
# Shortcut for:
# File.open(file, 'w') do |io|
# print_sample(sample, f)
# end
#
def self.print_sample_in_file(sample, file)
File.open(file, 'w') do |f|
print_sample(sample, f)
end
end
### private section ##########################################################
private
#
# Converts a parsable argument to an IO object or raises an ArgumentError.
#
def self.to_io(descr)
case descr
when IO
yield descr
when String
yield StringIO.new(descr)
else
raise ArgumentError, "IO instance expected, #{descr.class} received", caller
end
end
end # module ADL
end # module Stamina