lib/Generator.rb in re_expand-0.0.1 vs lib/Generator.rb in re_expand-0.0.2

- old
+ new

@@ -15,219 +15,223 @@ require 'Scanner' require 'Node' require 'Asearch' -class GenNode - def initialize(id, state=[], s="", substrings=[], accept=false) - @id = id - @s = s - @substrings = substrings - @accept = accept - @state = state +module ReExpand + class GenNode + def initialize(id, state=[], s="", substrings=[], accept=false) + @id = id + @s = s + @substrings = substrings + @accept = accept + @state = state + end + + attr :id, true + attr :s, true + attr :substrings, true + attr :accept, true + attr :state, true end - - attr :id, true - attr :s, true - attr :substrings, true - attr :accept, true - attr :state, true -end - -class Generator - def initialize(s = nil) - @s = (s ? [s] : []) - @matchedlist = [] - @par = 0 - @commands = [] - end - - def add(pat,command) - @s << pat - @commands << command - end - - def delete - @s.pop - @commands.pop - end - - # - # ルールを解析して状態遷移機械を作成し、patにマッチするもののリストを返す - # - def generate(pat, app = nil) - res = [[],[],[]] # 曖昧度0,1,2のマッチ結果 - patterns = pat.split.map { |p| p.downcase } - - @asearch = Asearch.new(pat) - scanner = Scanner.new(@s.join('|')) - - # HelpDataで指定した状態遷移機械全体を生成 - # (少し時間がかかる) - (startnode, endnode) = regexp(scanner,true) # top level - + + class Generator + def initialize(s = nil) + @s = (s ? [s] : []) + @matchedlist = [] + @par = 0 + @commands = [] + end + + def add(pat,command) + @s << pat + @commands << command + end + + def delete + @s.pop + @commands.pop + end + # - # 状態遷移機械からDepth-Firstで文字列を生成する - # n個のノードを経由して生成される状態の集合をlists[n]に入れる - # 生成しながらマッチングも計算する + # ルールを解析して状態遷移機械を作成し、patにマッチするもののリストを返す # - lists = [] - listed = [{},{},{}] - # - # 初期状態 - # - list = [] - list[0] = GenNode.new(startnode.id, @asearch.initstate) - lists[0] = list - # - (0..1000).each { |length| - break if app && app.inputPending - list = lists[length] - newlist = [] - # puts "#{length} - #{list.length}" - list.each { |entry| - srcnode = Node.node(entry.id) - if list.length * srcnode.trans.length < 10000 then - srcnode.trans.each { |trans| - ss = entry.substrings.dup - srcnode.pars.each { |i| - ss[i-1] = ss[i-1].to_s + trans.arg - } - newstate = @asearch.state(entry.state, trans.str) # 新しいマッチング状態を計算してノードに保存 - s = entry.s + trans.str - acceptno = trans.dest.accept - newlist << GenNode.new(trans.dest.id, newstate, s, ss, acceptno) - # - # この時点で、マッチしているかどうかをstateとacceptpatで判断できる - # マッチしてたら出力リストに加える - # - if acceptno then - maxambig = 2 - (0..maxambig).each { |ambig| - if !listed[ambig][s] then - if (newstate[ambig] & @asearch.acceptpat) != 0 then # マッチ - maxambig = ambig if ambig < maxambig # 曖昧度0でマッチすれば曖昧度1の検索は不要 - listed[ambig][s] = true - sslen = ss.length - if sslen > 0 then - # patstr = "(.*)\t" * (sslen-1) + "(.*)" - patstr = (["(.*)"] * sslen).join("\t") - /#{patstr}/ =~ ss.join("\t") + def generate(pat, app = nil) + res = [[],[],[]] # 曖昧度0,1,2のマッチ結果 + patterns = pat.split.map { |p| p.downcase } + + @asearch = Asearch.new(pat) + scanner = Scanner.new(@s.join('|')) + + # HelpDataで指定した状態遷移機械全体を生成 + # (少し時間がかかる) + (startnode, endnode) = regexp(scanner,true) # top level + + # + # 状態遷移機械からDepth-Firstで文字列を生成する + # n個のノードを経由して生成される状態の集合をlists[n]に入れる + # 生成しながらマッチングも計算する + # + lists = [] + listed = [{},{},{}] + # + # 初期状態 + # + list = [] + list[0] = GenNode.new(startnode.id, @asearch.initstate) + lists[0] = list + # + (0..1000).each { |length| + break if app && app.inputPending + list = lists[length] + newlist = [] + # puts "#{length} - #{list.length}" + list.each { |entry| + srcnode = Node.node(entry.id) + if list.length * srcnode.trans.length < 10000 then + srcnode.trans.each { |trans| + ss = entry.substrings.dup + srcnode.pars.each { |i| + ss[i-1] = ss[i-1].to_s + trans.arg + } + newstate = @asearch.state(entry.state, trans.str) # 新しいマッチング状態を計算してノードに保存 + s = entry.s + trans.str + acceptno = trans.dest.accept + newlist << GenNode.new(trans.dest.id, newstate, s, ss, acceptno) + # + # この時点で、マッチしているかどうかをstateとacceptpatで判断できる + # マッチしてたら出力リストに加える + # + if acceptno then + maxambig = 2 + (0..maxambig).each { |ambig| + if !listed[ambig][s] then + if (newstate[ambig] & @asearch.acceptpat) != 0 then # マッチ + maxambig = ambig if ambig < maxambig # 曖昧度0でマッチすれば曖昧度1の検索は不要 + listed[ambig][s] = true + sslen = ss.length + if sslen > 0 then + # patstr = "(.*)\t" * (sslen-1) + "(.*)" + patstr = (["(.*)"] * sslen).join("\t") + /#{patstr}/ =~ ss.join("\t") + end + ## next if $1 == $2 + # 'set date #{$2}' のような記述の$変数にsubstringの値を代入 + File.open("/tmp/log","a"){ |f| + f.puts "#{s}-----" + eval('%('+@commands[acceptno]+')') + } + res[ambig] << [s, eval('%('+@commands[acceptno]+')')] end -## next if $1 == $2 - # 'set date #{$2}' のような記述の$変数にsubstringの値を代入 - File.open("/tmp/log","a"){ |f| - f.puts "#{s}-----" + eval('%('+@commands[acceptno]+')') - } - res[ambig] << [s, eval('%('+@commands[acceptno]+')')] end - end - } - end - } - end + } + end + } + end + } + break if newlist.length == 0 + lists << newlist + break if res[0].length > 100 } - break if newlist.length == 0 - lists << newlist - break if res[0].length > 100 - } - [res[0], res[1], res[2]] - end - - # - # 正規表現をパースして状態遷移機械を作る - # - private - # n1 n2 - # +-->□.....□--+ - # start / \ end - # □ --->□.....□---> □ - # \ / - # +-->□.....□--+ - # - def regexp(s,toplevel=false) # regcat { '|' regcat } - startnode = Node.new - endnode = Node.new - if toplevel then - @pars = [] - @parno = 0 - @ruleid = 0 + [res[0], res[1], res[2]] end - startnode.pars = @pars - endnode.pars = @pars - (n1, n2) = regcat(s) - startnode.addTrans('',n1) - if toplevel then - n2.accept = @ruleid - end - n2.addTrans('',endnode) - while s.gettoken == '|' && s.nexttoken != '' do + + # + # 正規表現をパースして状態遷移機械を作る + # + private + # n1 n2 + # +-->□.....□--+ + # start / \ end + # □ --->□.....□---> □ + # \ / + # +-->□.....□--+ + # + def regexp(s,toplevel=false) # regcat { '|' regcat } + startnode = Node.new + endnode = Node.new if toplevel then @pars = [] @parno = 0 - @ruleid += 1 + @ruleid = 0 end + startnode.pars = @pars + endnode.pars = @pars (n1, n2) = regcat(s) startnode.addTrans('',n1) if toplevel then n2.accept = @ruleid end n2.addTrans('',endnode) - end - s.ungettoken - return [startnode, endnode] - end - - def regcat(s) # regfactor { regfactor } - (startnode, endnode) = regfactor(s) - while s.gettoken !~ /^[\)\]\|]$/ && s.nexttoken != '' do + while s.gettoken == '|' && s.nexttoken != '' do + if toplevel then + @pars = [] + @parno = 0 + @ruleid += 1 + end + (n1, n2) = regcat(s) + startnode.addTrans('',n1) + if toplevel then + n2.accept = @ruleid + end + n2.addTrans('',endnode) + end s.ungettoken - (n1, n2) = regfactor(s) - endnode.addTrans('',n1) - endnode = n2 + return [startnode, endnode] end - s.ungettoken - return [startnode, endnode] - end - - def regfactor(s) # regterm [ '?' | '+' | '*' ] - (startnode, endnode) = regterm(s) - t = s.gettoken - if t =~ /^[\?]$/ then - startnode.addTrans('',endnode) - elsif t =~ /^[\+]$/ then - endnode.addTrans('',startnode) - elsif t =~ /^[\*]$/ then - startnode.addTrans('',endnode) - endnode.addTrans('',startnode) - else + + def regcat(s) # regfactor { regfactor } + (startnode, endnode) = regfactor(s) + while s.gettoken !~ /^[\)\]\|]$/ && s.nexttoken != '' do + s.ungettoken + (n1, n2) = regfactor(s) + endnode.addTrans('',n1) + endnode = n2 + end s.ungettoken + return [startnode, endnode] end - return [startnode,endnode] - end - - def regterm(s) # '(' regexp ')' | token - t = s.gettoken - if t == '(' then - @parno += 1 - @pars.push(@parno) - (n1, n2) = regexp(s) - n1.pars = @pars.dup + + def regfactor(s) # regterm [ '?' | '+' | '*' ] + (startnode, endnode) = regterm(s) t = s.gettoken - if t == ')' then - @pars.pop - n2.pars = @pars.dup - return [n1, n2] + if t =~ /^[\?]$/ then + startnode.addTrans('',endnode) + elsif t =~ /^[\+]$/ then + endnode.addTrans('',startnode) + elsif t =~ /^[\*]$/ then + startnode.addTrans('',endnode) + endnode.addTrans('',startnode) else - puts 'missing )' - exit + s.ungettoken end - else - startnode = Node.new - startnode.pars = @pars.dup - endnode = Node.new - endnode.pars = @pars.dup - startnode.addTrans(t,endnode) - return [startnode, endnode] + return [startnode,endnode] end + + def regterm(s) # '(' regexp ')' | token + t = s.gettoken + if t == '(' then + @parno += 1 + @pars.push(@parno) + (n1, n2) = regexp(s) + n1.pars = @pars.dup + t = s.gettoken + if t == ')' then + @pars.pop + n2.pars = @pars.dup + return [n1, n2] + else + puts 'missing )' + exit + end + else + startnode = Node.new + startnode.pars = @pars.dup + endnode = Node.new + endnode.pars = @pars.dup + startnode.addTrans(t,endnode) + return [startnode, endnode] + end + end end + end +