Sha256: 7549ee663d221554a8e7c82cfc81059fa590e9fc2bdb7f1b8e9355990653374b
Contents?: true
Size: 1.07 KB
Versions: 4
Compression:
Stored size: 1.07 KB
Contents
#!/usr/bin/env ruby require 'pp' require 'reckon' ledger_file = ARGV[0] account = ARGV[1] seed = ARGV[2] ? ARGV[2].to_i : Random.new_seed ledger = Reckon::LedgerParser.new(File.read(ledger_file)) matcher = Reckon::CosineSimilarity.new({}) train = [] test = [] def has_account(account, entry) entry[:accounts].map { |a| a[:name] }.include?(account) end entries = ledger.entries.select { |e| has_account(account, e) } r = Random.new(seed) entries.length.times do |i| r.rand < 0.9 ? train << i : test << i end train.each do |i| entry = entries[i] entry[:accounts].each do |a| matcher.add_document( a[:name], [entry[:desc], a[:amount]].join(" ") ) end end result = [nil] * test.length test.each do |i| entry = entries[i] matches = matcher.find_similar( entry[:desc] + " " + entry[:accounts][0][:amount].to_s ) if !matches[0] || !has_account(matches[0][:account], entry) result[i] = [entry, matches] end end # pp result.compact puts "using #{seed} as random seed" puts "true: #{result.count(nil)} false: #{result.count { |v| !v.nil? }}"
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
reckon-0.9.0 | spec/cosine_training_and_test.rb |
reckon-0.8.1 | spec/cosine_training_and_test.rb |
reckon-0.8.0 | spec/cosine_training_and_test.rb |
reckon-0.7.2 | spec/cosine_training_and_test.rb |