lib/reckon/app.rb in reckon-0.4.4 vs lib/reckon/app.rb in reckon-0.5.0
- old
+ new
@@ -1,99 +1,97 @@
-#coding: utf-8
+# coding: utf-8
require 'pp'
require 'yaml'
module Reckon
class App
- VERSION = "Reckon 0.4.4"
- attr_accessor :options, :accounts, :tokens, :seen, :csv_parser, :regexps
+ attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
def initialize(options = {})
+ LOGGER.level = Logger::INFO if options[:verbose]
self.options = options
- self.tokens = {}
self.regexps = {}
- self.accounts = {}
self.seen = {}
self.options[:currency] ||= '$'
options[:string] = File.read(options[:file]) unless options[:string]
@csv_parser = CSVParser.new( options )
+ @matcher = CosineSimilarity.new(options)
learn!
end
def interactive_output(str)
return if options[:unattended]
puts str
end
+ def learn!
+ learn_from_account_tokens(options[:account_tokens_file])
+
+ ledger_file = options[:existing_ledger_file]
+ return unless ledger_file
+ fail "#{ledger_file} doesn't exist!" unless File.exists?(ledger_file)
+ learn_from(File.read(ledger_file))
+ end
+
+ def learn_from_account_tokens(filename)
+ return unless filename
+
+ fail "#{filename} doesn't exist!" unless File.exists?(filename)
+
+ extract_account_tokens(YAML.load_file(filename)).each do |account, tokens|
+ tokens.each do |t|
+ if t.start_with?('/')
+ add_regexp(account, t)
+ else
+ @matcher.add_document(account, t)
+ end
+ end
+ end
+ end
+
def learn_from(ledger)
LedgerParser.new(ledger).entries.each do |entry|
entry[:accounts].each do |account|
- learn_about_account( account[:name],
- [entry[:desc], account[:amount]].join(" ") ) unless account[:name] == options[:bank_account]
- seen[entry[:date]] ||= {}
- seen[entry[:date]][@csv_parser.pretty_money(account[:amount])] = true
+ str = [entry[:desc], account[:amount]].join(" ")
+ @matcher.add_document(account[:name], str) unless account[:name] == options[:bank_account]
+ pretty_date = entry[:date].iso8601
+ seen[pretty_date] ||= {}
+ seen[pretty_date][@csv_parser.pretty_money(account[:amount])] = true
end
end
end
- def already_seen?(row)
- seen[row[:pretty_date]] && seen[row[:pretty_date]][row[:pretty_money]]
- end
-
+ # Add tokens from account_tokens_file to accounts
def extract_account_tokens(subtree, account = nil)
if subtree.nil?
puts "Warning: empty #{account} tree"
{}
elsif subtree.is_a?(Array)
{ account => subtree }
else
- at = subtree.map { |k, v| extract_account_tokens(v, [account, k].compact.join(':')) }
- at.inject({}) { |k, v| k = k.merge(v)}
- end
- end
-
- def learn!
- if options[:account_tokens_file]
- fail "#{options[:account_tokens_file]} doesn't exist!" unless File.exists?(options[:account_tokens_file])
- extract_account_tokens(YAML.load_file(options[:account_tokens_file])).each do |account, tokens|
- tokens.each { |t| learn_about_account(account, t, true) }
+ at = subtree.map do |k, v|
+ merged_acct = [account, k].compact.join(':')
+ extract_account_tokens(v, merged_acct)
end
+ at.inject({}) { |memo, e| memo.merge!(e)}
end
- return unless options[:existing_ledger_file]
- fail "#{options[:existing_ledger_file]} doesn't exist!" unless File.exists?(options[:existing_ledger_file])
- ledger_data = File.read(options[:existing_ledger_file])
- learn_from(ledger_data)
end
- def learn_about_account(account, data, parse_regexps = false)
- accounts[account] ||= 0
- if parse_regexps && data.start_with?('/')
- # https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
- match = data.match(/^\/(.*)\/([ix]*)$/m)
- fail "failed to parse regexp #{data}" unless match
- options = 0
- (match[2] || '').split('').each do |option|
- case option
- when 'x' then options |= Regexp::EXTENDED
- when 'i' then options |= Regexp::IGNORECASE
- end
+ def add_regexp(account, regex_str)
+ # https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
+ match = regex_str.match(/^\/(.*)\/([ix]*)$/m)
+ fail "failed to parse regexp #{regex_str}" unless match
+ options = 0
+ (match[2] || '').split('').each do |option|
+ case option
+ when 'x' then options |= Regexp::EXTENDED
+ when 'i' then options |= Regexp::IGNORECASE
end
- regexps[Regexp.new(match[1], options)] = account
- else
- tokenize(data).each do |token|
- tokens[token] ||= {}
- tokens[token][account] ||= 0
- tokens[token][account] += 1
- accounts[account] += 1
- end
end
+ regexps[Regexp.new(match[1], options)] = account
end
- def tokenize(str)
- str.downcase.split(/[\s\-]/)
- end
-
def walk_backwards
seen_anything_new = false
each_row_backwards do |row|
interactive_output Terminal::Table.new(:rows => [ [ row[:pretty_date], row[:pretty_money], row[:description] ] ])
@@ -105,12 +103,11 @@
end
else
seen_anything_new = true
end
- possible_answers = most_specific_regexp_match(row)
- possible_answers = weighted_account_match( row ).map! { |a| a[:account] } if possible_answers.empty?
+ possible_answers = suggest(row)
ledger = if row[:money] > 0
if options[:unattended]
out_of_account = possible_answers.first || options[:default_outof_account] || 'Income:Unknown'
else
@@ -154,110 +151,82 @@
learn_from(ledger) unless options[:account_tokens_file]
output(ledger)
end
end
- def finish
- options[:output_file].close unless options[:output_file] == STDOUT
- interactive_output "Exiting."
- exit
+ def each_row_backwards
+ rows = []
+ (0...@csv_parser.columns.first.length).to_a.each do |index|
+ rows << { :date => @csv_parser.date_for(index),
+ :pretty_date => @csv_parser.pretty_date_for(index),
+ :pretty_money => @csv_parser.pretty_money_for(index),
+ :pretty_money_negated => @csv_parser.pretty_money_for(index, :negate),
+ :money => @csv_parser.money_for(index),
+ :description => @csv_parser.description_for(index) }
+ end
+ rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row|
+ yield row
+ end
end
- def output(ledger_line)
- options[:output_file].puts ledger_line
- options[:output_file].flush
- end
-
def most_specific_regexp_match( row )
matches = regexps.map { |regexp, account|
if match = regexp.match(row[:description])
[account, match[0]]
end
}.compact
matches.sort_by! { |account, matched_text| matched_text.length }.map(&:first)
end
- # Weigh accounts by how well they match the row
- def weighted_account_match( row )
- query_tokens = tokenize(row[:description])
-
- search_vector = []
- account_vectors = {}
-
- query_tokens.each do |token|
- idf = Math.log((accounts.keys.length + 1) / ((tokens[token] || {}).keys.length.to_f + 1))
- tf = 1.0 / query_tokens.length.to_f
- search_vector << tf*idf
-
- accounts.each do |account, total_terms|
- tf = (tokens[token] && tokens[token][account]) ? tokens[token][account] / total_terms.to_f : 0
- account_vectors[account] ||= []
- account_vectors[account] << tf*idf
- end
- end
-
- # Should I normalize the vectors? Probably unnecessary due to tf-idf and short documents.
-
- account_vectors = account_vectors.to_a.map do |account, account_vector|
- { :cosine => (0...account_vector.length).to_a.inject(0) { |m, i| m + search_vector[i] * account_vector[i] },
- :account => account }
- end
- account_vectors.sort! {|a, b| b[:cosine] <=> a[:cosine] }
-
- # Return empty set if no accounts matched so that we can fallback to the defaults in the unattended mode
- if options[:unattended]
- if account_vectors.first && account_vectors.first[:account]
- account_vectors = [] if account_vectors.first[:cosine] == 0
- end
- end
-
- return account_vectors
+ def suggest(row)
+ most_specific_regexp_match(row) +
+ @matcher.find_similar(row[:description]).map { |n| n[:account] }
end
def ledger_format(row, line1, line2)
out = "#{row[:pretty_date]}\t#{row[:description]}\n"
out += "\t#{line1.first}\t\t\t\t\t#{line1.last}\n"
out += "\t#{line2.first}\t\t\t\t\t#{line2.last}\n\n"
out
end
+ def output(ledger_line)
+ options[:output_file].puts ledger_line
+ options[:output_file].flush
+ end
+
+ def already_seen?(row)
+ seen[row[:pretty_date]] && seen[row[:pretty_date]][row[:pretty_money]]
+ end
+
+ def finish
+ options[:output_file].close unless options[:output_file] == STDOUT
+ interactive_output "Exiting."
+ exit
+ end
+
def output_table
output = Terminal::Table.new do |t|
t.headings = 'Date', 'Amount', 'Description'
each_row_backwards do |row|
t << [ row[:pretty_date], row[:pretty_money], row[:description] ]
end
end
interactive_output output
end
- def each_row_backwards
- rows = []
- (0...@csv_parser.columns.first.length).to_a.each do |index|
- rows << { :date => @csv_parser.date_for(index),
- :pretty_date => @csv_parser.pretty_date_for(index),
- :pretty_money => @csv_parser.pretty_money_for(index),
- :pretty_money_negated => @csv_parser.pretty_money_for(index, :negate),
- :money => @csv_parser.money_for(index),
- :description => @csv_parser.description_for(index) }
- end
- rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row|
- yield row
- end
- end
-
def self.parse_opts(args = ARGV)
options = { :output_file => STDOUT }
parser = OptionParser.new do |opts|
opts.banner = "Usage: Reckon.rb [options]"
opts.separator ""
opts.on("-f", "--file FILE", "The CSV file to parse") do |file|
options[:file] = file
end
- opts.on("-a", "--account name", "The Ledger Account this file is for") do |a|
+ opts.on("-a", "--account NAME", "The Ledger Account this file is for") do |a|
options[:bank_account] = a
end
opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
options[:verbose] = v
@@ -281,10 +250,18 @@
opts.on("", "--ignore-columns 1,2,5", "Columns to ignore in the CSV file - the first column is column 1") do |ignore|
options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
end
+ opts.on("", "--money-column 2", Integer, "Specify the money column instead of letting Reckon guess - the first column is column 1") do |column_number|
+ options[:money_column] = column_number
+ end
+
+ opts.on("", "--date-column 3", Integer, "Specify the date column instead of letting Reckon guess - the first column is column 1") do |column_number|
+ options[:date_column] = column_number
+ end
+
opts.on("", "--contains-header [N]", "The first row of the CSV is a header and should be skipped. Optionally add the number of rows to skip.") do |contains_header|
options[:contains_header] = 1
options[:contains_header] = contains_header.to_i if contains_header
end
@@ -314,15 +291,15 @@
opts.on("-t", "--account-tokens FILE", "YAML file with manually-assigned tokens for each account (see README)") do |a|
options[:account_tokens_file] = a
end
- opts.on("", "--default-into-account name", "Default into account") do |a|
+ opts.on("", "--default-into-account NAME", "Default into account") do |a|
options[:default_into_account] = a
end
- opts.on("", "--default-outof-account name", "Default 'out of' account") do |a|
+ opts.on("", "--default-outof-account NAME", "Default 'out of' account") do |a|
options[:default_outof_account] = a
end
opts.on("", "--suffixed", "If --currency should be used as a suffix. Defaults to false.") do |e|
options[:suffixed] = e
@@ -349,10 +326,9 @@
exit
end
end
unless options[:bank_account]
-
fail "Please specify --account for the unattended mode" if options[:unattended]
options[:bank_account] = ask("What is the account name of this bank account in Ledger? ") do |q|
q.readline = true
q.validate = /^.{2,}$/