bin/updateAll in wmap-2.6.7 vs bin/updateAll in wmap-2.6.8
- old
+ new
@@ -1,16 +1,43 @@
#!/usr/bin/env ruby
# the up to date program to refresh all local cache tables in one shot. Note it requires an uninterrupted Internet connection to perform the job. It also takes a long time so patient is needed. Lastly, don't forget to backup the existing 'data' folder before execute this command, cause any unexpected interruption may wreak havoc on the existing data file!!!
-
+#
+#
+# Usage: updateAll -d <Optional Data Directory>
require "wmap"
+require "optparse"
+# program command line options
+options = {:data_dir => nil, :target => nil, :verbose => false}
+parser = OptionParser.new do|opts|
+ opts.banner = Wmap.banner
+ opts.on('-d', '--data_dir data_dir', 'Web Mapper local cache data directory') do |data_dir|
+ options[:data_dir] = data_dir;
+ end
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
+ options[:verbose] = v;
+ end
+ opts.on('-h', '--help', 'Displays Help') do
+ puts opts
+ exit 0
+ end
+end
+parser.parse!
+
puts Wmap.banner
puts "Program to refresh the local data repository."
-Log_dir=File.dirname(__FILE__)+'/../logs/'
+# Preparing - check out the working logs directory
+if options[:data_dir]
+ # Log to the instance running directory
+ Log_dir = Pathname.new(options[:data_dir]).join('logs')
+else
+ # Log the command entry
+ Log_dir=Pathname.new(Gem.loaded_specs['wmap'].full_gem_path).join('logs')
+end
+Dir.mkdir(Log_dir) unless Dir.exist?(Log_dir)
Wmap.wlog("Execute the command: updateAll","updateAll",Log_dir+"wmap.log")
-abort "Incorrect program argument - no argument needed! Proper Usage: updateAll" unless ARGV.length==0
=begin
puts "You're about to update Wmap data repository. It'll take a long time. And the Internet connection must be un-interrupted during the process. You're also expected to backup the data folder before proceeding. Are you ready? (Yes/No)"
STDOUT.flush
answer=gets.chomp
if answer =~ /yes/i
@@ -19,43 +46,78 @@
abort "You got it. Mission is successfully aborted. "
end
=end
# Update sub-domain table
sd=Wmap::DomainTracker::SubDomain.instance
+if options[:data_dir]
+ sd.data_dir=options[:data_dir]
+ sd.sub_domains_file = sd.data_dir + "/" + "sub_domains"
+ sd.known_internet_sub_domains = sd.load_domains_from_file(sd.sub_domains_file)
+end
sd.update_from_host_store!
subs=sd.known_internet_sub_domains.keys
sd=nil
# Update Domain table
dm=Wmap::DomainTracker.instance
+if options[:data_dir]
+ dm.data_dir=options[:data_dir]
+ dm.domains_file=dm.data_dir + "/" + "domains"
+ dm.load_domains_from_file(dm.domains_file)
+end
domains=dm.known_internet_domains.keys
dm=nil
# Brute force sub-domains to detect sub-domain hosts
-bruter=Wmap::DnsBruter.new
+bruter=Wmap::DnsBruter.new(:verbose => options[:verbose])
+if options[:data_dir]
+ bruter.data_dir=options[:data_dir]
+ bruter.file_hosts = bruter.data_dir + "/" + "hosts"
+end
sub_hosts=bruter.brutes(subs).values.flatten
hosts=bruter.brutes(domains).values.flatten
# Update primary host store
ph=Wmap::HostTracker::PrimaryHost.instance
+if options[:data_dir]
+ ph.data_dir=options[:data_dir]
+ ph.hosts_file = ph.data_dir + "/" + "prime_hosts"
+ ph.load_known_hosts_from_file(ph.hosts_file)
+end
ph.update_from_site_store!
ph.refresh_all
ph.save!
ph=nil
# Update host store
h=Wmap::HostTracker.instance
+if options[:data_dir]
+ h.data_dir=options[:data_dir]
+ h.hosts_file=h.data_dir + "/" +"hosts"
+ h.load_known_hosts_from_file(h.hosts_file)
+end
h.refresh_all
h.adds(sub_hosts)
h.adds(hosts)
h.save!
h=nil
# Update site store
st=Wmap::SiteTracker.instance
+if options[:data_dir]
+ st.data_dir=options[:data_dir]
+ st.sites_file=st.data_dir + "/" +"sites"
+ st.load_site_stores_from_file(st.sites_file)
+end
st.refresh_all
+# double-check the de-activated sites in case the site is back on-line again
dt=Wmap::SiteTracker::DeactivatedSite.instance
+if options[:data_dir]
+ dt.data_dir = options[:data_dir]
+ dt.sites_file = dt.data_dir + "/" + "deactivated_sites"
+ dt.known_sites = dt.load_site_stores_from_file(dt.sites_file)
+end
ds=dt.known_sites.keys
-st.adds(ds) #double-check the de-activated sites in case the site is back on-line again
+st.adds(ds)
st.save!
st=nil
dt=nil