bin/sup-import in sup-0.0.1 vs bin/sup-import in sup-0.0.2

- old
+ new

@@ -1,6 +1,6 @@ -#!/bin/env ruby +#!/usr/bin/env ruby require "sup" class Float def to_s; sprintf '%.2f', self; end @@ -25,11 +25,11 @@ source list. Usage: sup-import [options] <source>* where <source>* is zero or more source descriptions (e.g., mbox -filenames on disk). +filenames on disk, or imap/imaps URIs). If the sources listed are not already in the Sup source list, they will be added to it, as parameterized by the following options: --archive: messages from these sources will not appear in the inbox --unusual: these sources will not be polled when the flag --the-usual @@ -52,11 +52,10 @@ ones that have changed. You probably won't need this unless William changes the index format. --optimize: optimize the index after adding any new messages. --help: don't do anything, just show this message. EOS -#' stupid ruby-mode exit end educate_user if ARGV.member? '--help' @@ -66,66 +65,93 @@ force_read = ARGV.delete "--force-read" the_usual = ARGV.delete "--the-usual" rebuild = ARGV.delete "--rebuild" force_rebuild = ARGV.delete "--force-rebuild" optimize = ARGV.delete "--optimize" +start_at = # ok really need to use optparse or something now + if(i = ARGV.index("--start-at")) + raise "start-at requires a numeric argument: #{ARGV[i + 1].inspect}" unless ARGV.length > (i + 1) && ARGV[i + 1] =~ /\d/ + ARGV.delete_at i + ARGV.delete_at(i).to_i # whoa! + end if(o = ARGV.find { |x| x =~ /^--/ }) $stderr.puts "error: unknown option #{o}" educate_user end puts "loading index..." index = Redwood::Index.new index.load -pre_nm = index.size puts "loaded index of #{index.size} messages" sources = ARGV.map do |fn| + fn = "mbox://#{fn}" unless fn =~ %r!://! source = index.source_for fn unless source - source = Redwood::MBox::Loader.new(fn, 0, !unusual, !!archive) + source = + case fn + when %r!^imaps?://! + print "Username for #{fn}: " + username = $stdin.gets.chomp + print "Password for #{fn} (warning: cleartext): " + password = $stdin.gets.chomp + Redwood::IMAP.new(fn, username, password, nil, !unusual, !!archive) + else + Redwood::MBox::Loader.new(fn, nil, !unusual, !!archive) + end index.add_source source end source end + sources = (sources + index.usual_sources).uniq if the_usual -sources.each { |s| s.reset! } if rebuild || force_rebuild +if rebuild || force_rebuild + if start_at + sources.each { |s| s.seek_to! start_at } + else + sources.each { |s| s.reset! } + end +end found = {} start = Time.now begin sources.each do |source| + if source.broken? + puts "error loading messages from #{source}: #{source.broken_msg}" + next + end next if source.done? puts "loading from #{source}... " num = 0 start_offset = nil source.each do |offset, labels| start_offset ||= offset labels -= [:inbox] if force_archive labels -= [:unread] if force_read begin - m = Redwood::Message.new source, offset, labels + m = Redwood::Message.new :source => source, :source_info => offset, :labels => labels if found[m.id] puts "skipping duplicate message #{m.id}" next else found[m.id] = true end - - m.remove_label :unread if m.mbox_status == "RO" unless force_read + m.remove_label :unread if m.status == "RO" unless force_read + puts "# message at #{offset}, labels: #{labels * ', '}" unless rebuild || force_rebuild if (rebuild || force_rebuild) && (docid, entry = index.load_entry_for_id(m.id)) && entry if force_rebuild || entry[:source_info].to_i != offset puts "replacing message #{m.id} labels #{entry[:label].inspect} (offset #{entry[:source_info]} => #{offset})" m.labels = entry[:label].split.map { |l| l.intern } num += 1 if index.update_message m, source, offset end else num += 1 if index.add_message m end - rescue Redwood::MessageFormatError => e + rescue Redwood::MessageFormatError, Redwood::SourceError => e $stderr.puts "ignoring erroneous message at #{source}##{offset}: #{e.message}" end if num % 1000 == 0 && num > 0 elapsed = Time.now - start pctdone = (offset.to_f - start_offset) / (source.total.to_f - start_offset) @@ -139,21 +165,25 @@ index.save end if rebuild || force_rebuild puts "deleting missing messages from the index..." - numdel = 0 + numdel = num = 0 sources.each do |source| raise "no source id for #{source}" unless source.id - index.index.search_each("source_id:#{source.id}", :limit => :all) do |docid, score| + q = "+source_id:#{source.id}" + q += " +source_info: >= #{start_at}" if start_at + #p q + num += index.index.search_each(q, :limit => :all) do |docid, score| mid = index.index[docid][:message_id] next if found[mid] puts "deleting #{mid}" index.index.delete docid numdel += 1 end + #p num end - puts "deleted #{numdel} messages" + puts "deleted #{numdel} / #{num} messages" end if optimize puts "optimizing index..." optt = time { index.index.optimize }