bin/sup-sync in sup-0.0.8 vs bin/sup-sync in sup-0.1

- old
+ new

@@ -41,12 +41,12 @@ Usage: sup-sync [options] <source>* where <source>* is zero or more source URIs. If no sources are given, -sync from all usual sources. All supported source URI schemes can -be seen by running "sup-add --help". +sync from all usual sources. Supported source URI schemes can be seen +by running "sup-add --help". Options controlling WHICH messages sup-sync operates on: EOS opt :new, "Operate on new messages only. Don't scan over the entire source. (Default.)", :short => :none opt :changed, "Scan over the entire source for messages that have been deleted, altered, or moved from another source. (In the case of mbox sources, this includes all messages AFTER an altered message.)" @@ -87,11 +87,10 @@ target = [:new, :changed, :all, :restored].find { |x| opts[x] } || :new op = [:asis, :restore, :discard].find { |x| opts[x] } || :asis Redwood::start index = Redwood::Index.new -index.load restored_state = if opts[:restore] dump = {} $stderr.puts "Loading state dump from #{opts[:restore]}..." @@ -104,28 +103,30 @@ dump else {} end -sources = ARGV.map do |uri| - uri = "mbox://#{uri}" unless uri =~ %r!://! - index.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?" -end +seen = {} +index.lock_or_die +begin + index.load -sources = index.usual_sources if sources.empty? -sources = index.sources if opts[:all_sources] - -unless target == :new - if opts[:start_at] - sources.each { |s| s.seek_to! opts[:start_at] } - else - sources.each { |s| s.reset! } + sources = ARGV.map do |uri| + index.source_for uri or Trollop::die "Unknown source: #{uri}. Did you add it with sup-add first?" end -end + + sources = index.usual_sources if sources.empty? + sources = index.sources if opts[:all_sources] -seen = {} -begin + unless target == :new + if opts[:start_at] + sources.each { |s| s.seek_to! opts[:start_at] } + else + sources.each { |s| s.reset! } + end + end + sources.each do |source| $stderr.puts "Scanning #{source}..." num_added = num_updated = num_scanned = num_restored = 0 last_info_time = start_time = Time.now @@ -146,11 +147,11 @@ nil end ## skip if we're operating on restored messages, and this one ## ain't. - next if target == :restored && (!restored_state[m.id] || restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s }) + next if target == :restored && (!restored_state[m.id] || (index_state && restored_state[m.id].sort_by { |s| s.to_s } == index_state.sort_by { |s| s.to_s })) ## m.labels is the default source labels. tweak these according ## to default source state modification flags. m.labels -= [:inbox] if opts[:archive] m.labels -= [:unread] if opts[:read] @@ -175,11 +176,11 @@ if Time.now - last_info_time > 60 last_info_time = Time.now elapsed = last_info_time - start_time pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone) - $stderr.puts "## #{num_added + num_updated} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining (for this source)" + $stderr.puts "## #{num_scanned} (#{pctdone}%) read; #{elapsed.to_time_s} elapsed; #{remaining.to_time_s} remaining" end if index_state.nil? puts "Adding message #{source}##{offset} with state {#{m.labels * ', '}}" if opts[:verbose] num_added += 1 @@ -191,45 +192,50 @@ opts[:dry_run] ? nil : m end $stderr.puts "Scanned #{num_scanned}, added #{num_added}, updated #{num_updated} messages from #{source}." $stderr.puts "Restored state on #{num_restored} (#{100.0 * num_restored / num_scanned}%) messages." if num_restored > 0 end + + ## delete any messages in the index that claim they're from one of + ## these sources, but that we didn't see. + ## + ## kinda crappy code here, because we delve directly into the Ferret + ## API. + ## + ## TODO: move this to Index, i suppose. + + + if target == :all || target == :changed + $stderr.puts "Deleting missing messages from the index..." + num_del, num_scanned = 0, 0 + sources.each do |source| + raise "no source id for #{source}" unless source.id + q = "+source_id:#{source.id}" + q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] + index.index.search_each(q, :limit => :all) do |docid, score| + num_scanned += 1 + mid = index.index[docid][:message_id] + unless seen[mid] + puts "Deleting #{mid}" if opts[:verbose] + index.index.delete docid unless opts[:dry_run] + num_del += 1 + end + end + end + $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" + end + + if opts[:optimize] + $stderr.puts "Optimizing index..." + optt = time { index.index.optimize unless opts[:dry_run] } + $stderr.puts "Optimized index of size #{index.size} in #{optt}s." + end +rescue Redwood::FatalSourceError => e + $stderr.puts "Sorry, I couldn't communicate with a source: #{e.message}" rescue Exception => e File.open("sup-exception-log.txt", "w") { |f| f.puts e.backtrace } raise ensure index.save Redwood::finish -end - -## delete any messages in the index that claim they're from one of -## these sources, but that we didn't see. -## -## kinda crappy code here, because we delve directly into the Ferret -## API. -## -## TODO: move this to Index, i suppose. -if target == :all || target == :changed - $stderr.puts "Deleting missing messages from the index..." - num_del, num_scanned = 0, 0 - sources.each do |source| - raise "no source id for #{source}" unless source.id - q = "+source_id:#{source.id}" - q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at] - index.index.search_each(q, :limit => :all) do |docid, score| - num_scanned += 1 - mid = index.index[docid][:message_id] - unless seen[mid] - puts "Deleting #{mid}" if opts[:verbose] - index.index.delete docid unless opts[:dry_run] - num_del += 1 - end - end - end - $stderr.puts "Deleted #{num_del} / #{num_scanned} messages" -end - -if opts[:optimize] - $stderr.puts "Optimizing index..." - optt = time { index.index.optimize unless opts[:dry_run] } - $stderr.puts "Optimized index of size #{index.size} in #{optt}s." + index.unlock end