Sha256: d53d4f321e7bc6bd433db4b1ddd35e3c96e1f8b82b868e01e4d451b96d607a16
Contents?: true
Size: 1.14 KB
Versions: 16
Compression:
Stored size: 1.14 KB
Contents
# this tool creates the basic structure(s) to use a dictionary # for input dictionary structure, please see: https://stackoverflow.com/a/18386728/4352306 require "json" require "csv" require "pp" module Eco module Lexic class Dictionary DICTIONARY_FILE = File.join(__dir__, 'dictionary', 'dictionary.txt') TAGS_FILE = File.join(__dir__, 'dictionary', 'tags.json') PARSED_DICTIONARY = File.join(__dir__, 'dictionary', 'dictionary.json') attr_reader :tags def initialize() # aplhabetical list of part-of-speech tags used in the Penn Treebank Project # reference: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html tags = JSON.load(File.open(TAGS_FILE, "r")) end def parse_dictionary(dic_file = nil) dic_file = DICTIONARY_FILE if !dic_file data = CSV.open(dic_file, "r", { col_sep: "\t"}) i = 0 data.each do |row| inflected_form, base_form, tag = row #puts "%s %s %s" % [inflected_form, base_form, tag] i += 1 exit(1) if i>100 end end end end end
Version data entries
16 entries across 16 versions & 1 rubygems