Sha256: ce578e55925a02ac2ac16e9639f1d7bc2b5b252b5085b3657e5e7fb22b4ae296
Contents?: true
Size: 1.34 KB
Versions: 7
Compression:
Stored size: 1.34 KB
Contents
module Wukong module FlatPack class Parser attr_accessor :re attr_accessor :lang def initialize(lang) @lang = lang @re = re_from_language @lang end # returns true if the supplied string is in the parser's language def string_in_lang? str return (not (str =~ @re).nil?) end # Creates a regular expression from the # supplied language def re_from_language lang regex = "^" lang.each do |token| regex += "(#{token.re})" end regex += "$" return Regexp.new(regex) end def parse(str,trim=false) return nil unless string_in_lang? str result = [] str.match(@re)[1..-1].each_with_index do |val,index| token = lang[index].translate(val) if trim and token.is_a?(String) token.strip! end result << token end return result - [:ignore] end def file_to_tsv(in_filename,out_filename,trim=true) infile = File.open(in_filename,'r') outfile = File.open(out_filename,'a') infile.each_line do |line| outfile.write(line_to_tsv(line,trim)) end end def line_to_tsv(line,trim=true) fields = parse(line,trim) return fields.join("\t") + "\n" end end end end
Version data entries
7 entries across 7 versions & 2 rubygems