# this tool creates the basic structure(s) to use a dictionary # for input dictionary structure, please see: https://stackoverflow.com/a/18386728/4352306 require "json" require "csv" require "pp" module Eco module Lexic class Dictionary DICTIONARY_FILE = File.join(__dir__, 'dictionary', 'dictionary.txt') TAGS_FILE = File.join(__dir__, 'dictionary', 'tags.json') PARSED_DICTIONARY = File.join(__dir__, 'dictionary', 'dictionary.json') attr_reader :tags def initialize() # aplhabetical list of part-of-speech tags used in the Penn Treebank Project # reference: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html tags = JSON.load(File.open(TAGS_FILE, "r")) end def parse_dictionary(dic_file = nil) dic_file = DICTIONARY_FILE if !dic_file data = CSV.open(dic_file, "r", { col_sep: "\t"}) i = 0 data.each do |row| inflected_form, base_form, tag = row #puts "%s %s %s" % [inflected_form, base_form, tag] i += 1 exit(1) if i>100 end end end end end