Sha256: ce578e55925a02ac2ac16e9639f1d7bc2b5b252b5085b3657e5e7fb22b4ae296

Contents?: true

Size: 1.34 KB

Versions: 7

Compression:

Stored size: 1.34 KB

Contents

module Wukong
  module FlatPack
    class Parser
      attr_accessor :re
      attr_accessor :lang

      def initialize(lang)
        @lang = lang
        @re = re_from_language @lang
      end

      # returns true if the supplied string is in the parser's language
      def string_in_lang? str
        return (not (str =~ @re).nil?)
      end

      # Creates a regular expression from the 
      # supplied language
      def re_from_language lang
        regex = "^"
        lang.each do |token|
          regex += "(#{token.re})"
        end
        regex += "$"
        return Regexp.new(regex)
      end

      def parse(str,trim=false)
        return nil unless string_in_lang? str
        result = []
        str.match(@re)[1..-1].each_with_index do |val,index|
          token = lang[index].translate(val)
          if trim and token.is_a?(String)
            token.strip!
          end
          result << token
        end
        return result - [:ignore]
      end

      def file_to_tsv(in_filename,out_filename,trim=true)
        infile =  File.open(in_filename,'r')
        outfile = File.open(out_filename,'a')
        infile.each_line do |line|
          outfile.write(line_to_tsv(line,trim))
        end
      end

      def line_to_tsv(line,trim=true)
        fields = parse(line,trim)
        return fields.join("\t") + "\n"
      end
    end
  end
end

Version data entries

7 entries across 7 versions & 2 rubygems

Version Path
ul-wukong-4.1.1 lib/wukong/model/flatpack_parser/parser.rb
ul-wukong-4.1.0 lib/wukong/model/flatpack_parser/parser.rb
wukong-4.0.0 lib/wukong/model/flatpack_parser/parser.rb
wukong-3.0.1 lib/wukong/model/flatpack_parser/parser.rb
wukong-3.0.0 lib/wukong/model/flatpack_parser/parser.rb
wukong-3.0.0.pre3 lib/wukong/model/flatpack_parser/parser.rb
wukong-3.0.0.pre2 lib/wukong/model/flatpack_parser/parser.rb