#!/usr/bin/env ruby # -*- coding: utf-8 -*- #========================== # string_parser.rb #========================== # # Parses a phrase into leafs and nodes and store the result in an element list # (see element_list.rb) # # This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's # excellent program phpSyntaxTree. # # Copyright (c) 2007-2009 Yoichiro Hasebe # Copyright (c) 2003-2004 Andre Eisenbach # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA require 'elementlist' require 'element' def escape_high_ascii(string) html = "" string.length.times do |i| ch = string[i] if(ch < 127) html += ch.chr else html += sprintf("&#%d;", ch) end end html end class StringParser attr_accessor :data, :elist, :pos, :id, :level, :tncnt def initialize(str) # Clean up the data a little to make processing easier string = str.gsub(/\t/, "") rescue "" string.gsub!(/\s+/, " ") string.gsub!(/\] \[/, "][") string.gsub!(/ \[/, "[") @data = string # Store it for later... @elist = ElementList.new # Initialize internal element list @pos = 0 # Position in the sentence @id = 1 # ID for the next element @level = 0 # Level in the diagram @tncnt = Hash.new # Node type counts end # caution: quick and dirty solution def valid? if(@data.length < 1) return false end if /\A\s*\[.+ .+\]\s*\z/ !~ @data return false end text = @data.strip text_r = text.split(//) open_br, close_br = [], [] escape = false text_r.each do |chr| if chr == "\\" escape = true elsif chr == '[' && !escape open_br.push(chr) elsif chr == ']' && !escape close_br.push(chr) if open_br.length < close_br.length break end elsif escape escape = false end end return false unless open_br.length == close_br.length make_tree(0) return false if @tncnt.empty? @tncnt.each do |key, value| return false if key == "" end return true end def parse make_tree(0); end def get_elementlist @elist; end def auto_subscript elements = @elist.get_elements tmpcnt = Hash.new elements.each do |element| if(element.type == ETYPE_NODE) count = 1 content = element.content if @tncnt[content] count = @tncnt[content] end if(count > 1) if tmpcnt[content] tmpcnt[content] += 1 else tmpcnt[content] = 1 end element.content += ("_" + tmpcnt[content].to_s) end end end @tncnt end def count_node(name) name = name.strip if @tncnt[name] @tncnt[name] += 1 else @tncnt[name] = 1 end end def get_next_token data = @data.split(//) gottoken = false token = "" i = 0 if((@pos + 1) >= data.length) return "" end escape = false while(((@pos + i) < data.length) && !gottoken) ch = data[@pos + i]; case ch when "[" if escape token += ch escape = false else if(i > 0) gottoken = true else token += ch end end when "]" if escape token += ch escape = false else if(i == 0 ) token += ch end gottoken = true end when "\\" escape = true when /[\n\r]/ gottoken = false # same as do nothing else token += ch escape = false if escape end i += 1 end if(i > 1) @pos += (i - 1) else @pos += 1 end return token end def make_tree(parent) token = get_next_token.strip parts = Array.new while(token != "" && token != "]" ) token_r = token.split(//) case token_r[0] when "[" tl = token_r.length token_r = token_r[1, tl - 1] spaceat = token_r.index(" ") newparent = -1 if spaceat parts[0] = token_r[0, spaceat].join tl =token_r.length parts[1] = token_r[spaceat, tl - spaceat].join element = Element.new(@id, parent, parts[0], @level) @id += 1 @elist.add(element) newparent = element.id count_node(parts[0]) element = Element.new(@id, @id - 1, parts[1], @level + 1 ) @id += 1 @elist.add(element) else element = Element.new(@id, parent, token_r.join, @level) @id += 1 newparent = element.id @elist.add(element) count_node(token_r.join) end @level += 1 make_tree(newparent) else if token.strip != "" element = Element.new(@id, parent, token, @level) @id += 1 @elist.add(element) count_node(token) end end token = get_next_token end @level -= 1 end end