Sha256: 588fdff4ef708d1a450b16a4cb6012b455fd13fe84ee86a0c102848bcc35310b
Contents?: true
Size: 1.11 KB
Versions: 3
Compression:
Stored size: 1.11 KB
Contents
#!/usr/bin/env ruby require 'pp' require 'reckon' ledger_file = ARGV[0] account = ARGV[1] seed = ARGV[2] ? ARGV[2].to_i : Random.new_seed ledger = Reckon::LedgerParser.new.parse(File.new(ledger_file)) matcher = Reckon::CosineSimilarity.new({}) train = [] test = [] def has_account(account, entry) entry[:accounts].map { |a| a[:name] }.include?(account) end entries = ledger.entries.select { |e| has_account(account, e) } r = Random.new(seed) entries.length.times do |i| r.rand < 0.9 ? train << i : test << i end train.each do |i| entry = entries[i] entry[:accounts].each do |a| matcher.add_document( a[:name], [entry[:desc], a[:amount]].join(" ") ) end end result = [nil] * test.length test.each do |i| entry = entries[i] matches = matcher.find_similar( entry[:desc] + " " + entry[:accounts][0][:amount].to_s ) if !matches[0] || !has_account(matches[0][:account], entry) result[i] = [entry, matches] end end # pp result.compact puts "using #{seed} as random seed" puts "true: #{result.count(nil)} false: #{result.count { |v| !v.nil? }}" puts(result.filter { |v| !v.nil? })
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
reckon-0.11.1 | spec/cosine_training_and_test.rb |
reckon-0.11.0 | spec/cosine_training_and_test.rb |
reckon-0.10.0 | spec/cosine_training_and_test.rb |