module Stamina class RegLang class CanonicalInfo SHORT_PREFIXES = begin algo = Stamina::Utils::Decorate.new(:short_prefix) algo.set_suppremum do |d0,d1| if (d0.nil? || d1.nil?) (d0 || d1) else d0.size <= d1.size ? d0 : d1 end end algo.set_propagate do |deco, edge| deco.dup << edge.symbol end algo end attr_reader :cdfa def initialize(lang) @cdfa = lang.to_cdfa end # Returns the short prefix of a state or an edge. def short_prefix(s_or_e) prefixes! s_or_e[:short_prefix] ||= begin s_or_e.source[:short_prefix] + [s_or_e.symbol] end end # Returns a positive suffix for `state` def positive_suffix(state) state[:positive_suffix] ||= find_suffix(state, true) end # Returns a negative suffix for `state` def negative_suffix(state) state[:negative_suffix] ||= find_suffix(state, false) end # # Returns the short prefixes of the language as a sample # def short_prefixes prefixes = Sample.new cdfa.each_state do |s| prefixes << InputString.new(short_prefix(s), s.accepting?) end prefixes end # # Returns the language kernel as a sample # def kernel kernel = Sample.new kernel << InputString.new([], cdfa.initial_state.accepting?) cdfa.each_edge do |e| kernel << InputString.new(short_prefix(e), e.target.accepting?) end kernel end # # Builds a characteristic sample # def characteristic_sample sample = Sample.new # at least one positive string should be found from # the initial state if pos = positive_suffix(cdfa.initial_state) sample << InputString.new(pos, true) else sample << InputString.new([], false) return sample end # condition 1: positive string for each element of the kernel cdfa.each_edge do |edge| pos = short_prefix(edge) + positive_suffix(edge.target) sample << InputString.new(pos, true, false) end # condition 2: pair-wise distinguising suffixes cdfa.each_state do |source| cdfa.each_edge do |edge| next if (target = edge.target) == source if suffix = distinguish(source, target) sign = cdfa.accepts?(suffix, source) sample << InputString.new(short_prefix(source) + suffix, sign) sample << InputString.new(short_prefix(edge) + suffix, !sign) end end end sample end private # Ensures that short prefixes of states are recognized def prefixes! unless defined?(@prefixes) SHORT_PREFIXES.execute(cdfa, nil, []) @prefixes = true end end def cross(xs, ys) xs.each{|x| ys.each{|y| yield(x,y)}} end # Distinguishes two states, returning a suffix which is accepted for one # and rejected by the other def distinguish(x, y) raise ArgumentError, "x and y should be different" if x == y build_distinguish_matrix[[x,y].sort] end def build_distinguish_matrix @diff_matrix ||= begin mat = {} # pairs to be explored to_explore = [] # start by marking accepting vs. non-accepting states acc, nonacc = cdfa.states.partition{|s| s.accepting?} cross(acc, nonacc) do |*pair| mat[pair.sort!] = [] to_explore << pair end # Visit each pair backwards while pair = to_explore.pop suffix = mat[pair] cross(pair[0].in_edges, pair[1].in_edges) do |se, te| next if se.symbol != te.symbol source = [se.source, te.source].sort! if mat[source].nil? || (mat[source].length > (1+suffix.length)) mat[source] = [se.symbol] + suffix to_explore.push(source) end end end mat end end # Recursively finds a positive/negative suffix for `state` def find_suffix(state, positive, stack = [], seen = {}) if positive == state.accepting? # (pos and accepting) or (neg and non-accepting) => lambda stack elsif found = state.out_edges.find{|e| positive == e.target.accepting?} # at one step => augment stack with symbol stack << found.symbol elsif found = state.out_edges.find{|e| !seen.has_key?(e.target)} # recurse on a neighbour if you find one seen[state] = true find_suffix(found.target, positive, stack << found.symbol, seen) elsif !positive # in case of negative suffix: pick one in alphabet outs = state.out_symbols found = state.automaton.alphabet.find{|s| !outs.include?(s)} found ? (stack << found) : nil else # unable to find a suffix :-( nil end end end # class CanonicalInfo end # class RegLang end # module Stamina