lib/reckon/app.rb in reckon-0.4.3 vs lib/reckon/app.rb in reckon-0.4.4

- old
+ new

@@ -2,16 +2,17 @@ require 'pp' require 'yaml' module Reckon class App - VERSION = "Reckon 0.4.3" - attr_accessor :options, :accounts, :tokens, :seen, :csv_parser + VERSION = "Reckon 0.4.4" + attr_accessor :options, :accounts, :tokens, :seen, :csv_parser, :regexps def initialize(options = {}) self.options = options self.tokens = {} + self.regexps = {} self.accounts = {} self.seen = {} self.options[:currency] ||= '$' options[:string] = File.read(options[:file]) unless options[:string] @csv_parser = CSVParser.new( options ) @@ -52,26 +53,40 @@ def learn! if options[:account_tokens_file] fail "#{options[:account_tokens_file]} doesn't exist!" unless File.exists?(options[:account_tokens_file]) extract_account_tokens(YAML.load_file(options[:account_tokens_file])).each do |account, tokens| - tokens.each { |t| learn_about_account(account, t) } + tokens.each { |t| learn_about_account(account, t, true) } end end return unless options[:existing_ledger_file] fail "#{options[:existing_ledger_file]} doesn't exist!" unless File.exists?(options[:existing_ledger_file]) ledger_data = File.read(options[:existing_ledger_file]) learn_from(ledger_data) end - def learn_about_account(account, data) + def learn_about_account(account, data, parse_regexps = false) accounts[account] ||= 0 - tokenize(data).each do |token| - tokens[token] ||= {} - tokens[token][account] ||= 0 - tokens[token][account] += 1 - accounts[account] += 1 + if parse_regexps && data.start_with?('/') + # https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb + match = data.match(/^\/(.*)\/([ix]*)$/m) + fail "failed to parse regexp #{data}" unless match + options = 0 + (match[2] || '').split('').each do |option| + case option + when 'x' then options |= Regexp::EXTENDED + when 'i' then options |= Regexp::IGNORECASE + end + end + regexps[Regexp.new(match[1], options)] = account + else + tokenize(data).each do |token| + tokens[token] ||= {} + tokens[token][account] ||= 0 + tokens[token][account] += 1 + accounts[account] += 1 + end end end def tokenize(str) str.downcase.split(/[\s\-]/) @@ -90,11 +105,12 @@ end else seen_anything_new = true end - possible_answers = weighted_account_match( row ).map! { |a| a[:account] } + possible_answers = most_specific_regexp_match(row) + possible_answers = weighted_account_match( row ).map! { |a| a[:account] } if possible_answers.empty? ledger = if row[:money] > 0 if options[:unattended] out_of_account = possible_answers.first || options[:default_outof_account] || 'Income:Unknown' else @@ -147,9 +163,18 @@ end def output(ledger_line) options[:output_file].puts ledger_line options[:output_file].flush + end + + def most_specific_regexp_match( row ) + matches = regexps.map { |regexp, account| + if match = regexp.match(row[:description]) + [account, match[0]] + end + }.compact + matches.sort_by! { |account, matched_text| matched_text.length }.map(&:first) end # Weigh accounts by how well they match the row def weighted_account_match( row ) query_tokens = tokenize(row[:description])