#!/usr/bin/env ruby require "rawbotz" require 'magento_remote' require 'optparse' options = { sleep_time: 1, start_pid: 0, limit: nil, check_new: false, max_dead: 100, conf_file: 'remote-shop.yml'} def options.print_default(sym); "(default: #{self[sym]})" end program_name = File.basename __FILE__ $program_name = program_name optparse = OptionParser.new do |opts| opts.banner = "Usage: #{program_name} [OPTIONS]" opts.separator "" opts.separator "Scrapes remote shop and 'imports' products from it." opts.separator "" opts.separator "Will read remote shop conf from config file." opts.separator "" opts.separator "Configuration file (remote shop info, local db info)" opts.on("-c", "--config FILE", 'file path to YAML config file.') do |c| options[:config] = c end opts.separator "" opts.separator "Parsing/scraping options" opts.on("-s", "--start-pid PRODUCT_ID", Integer, "start at product_id PRODUCT_ID #{options.print_default(:start_pid)}") do |v| options[:start_pid] = v end opts.on("-w", "--wait SECONDS", Float, "sleep SECONDS between two requests #{options.print_default(:sleep_time)}") do |v| options[:sleep_time] = v end opts.on("-b", "--batches NUMBER", Integer, "try NUMBER batches before assuming no further hits #{options.print_default(:max_dead)}") do |v| options[:max_dead] = v end opts.on("-m", "--[no-]mail", "send mail with result") do |v| options[:mail] = v end opts.on("-l", "--limit LIMIT", Integer, "try LIMIT products #{options.print_default(:limit)}") do |v| options[:limit] = v end opts.on("-n", "--check-new", Integer, "check only for new products") do |v| options[:check_new] = true end opts.separator "" opts.separator "Output options" opts.on("-v", "--verbose", 'print debug output (WARNING: including PASSWORD)') do |v| $stdout.sync = true options[:verbose] = true end opts.separator "" opts.separator "General" opts.on_tail('--version', 'Show version.') do puts "#{program_name} #{Rawbotz::VERSION}" exit 0 end opts.on_tail('-h', '--help', 'Show this help.') do puts opts exit 0 end end optparse.parse! if options[:check_new] && options[:start_pid] != 0 STDERR.puts "Cannot define both check-new and start-pid options" exit 1 end def main options STDOUT.sync = true logger = Logger.new(STDOUT) logger.level = options[:verbose] ? Logger::DEBUG : Logger::INFO logger.debug "#{$program_name} #{Rawbotz::VERSION}" mech_config = (YAML.load_file options[:config])["remote_shop"] logger.debug(mech_config) mech = MagentoMech.from_config(mech_config) mech.log_to! logger RawgentoModels.establish_connection options[:config] logger.debug("Established connection to rawbotz database.") supplier_name = mech_config["supplier_name"] supplier = RawgentoModels::Supplier.find_or_create_by(name: supplier_name) logger.debug("Supplier is #{supplier.name}") number_dead = 0 product_count = RawgentoModels::RemoteProduct.count current_pid = options[:start_pid] if options[:check_new] # max (product_id) last_product_id = RawgentoModels::RemoteProduct.unscoped.where(supplier: supplier).order(product_id: :desc).first.product_id rescue 0 logger.debug "Nr Remote Products: #{RawgentoModels::RemoteProduct.where(supplier: supplier).count}" current_pid = last_product_id options[:start_pid] = last_product_id logger.info "Check for new products (last found product_id was #{last_product_id})" end while true do p = mech.scrape_products(current_pid, 1, options[:sleep_time]) if !p.empty? logger.info "Found #{p[0].inspect}" product_db = RawgentoModels::RemoteProduct.find_or_initialize_by(product_id: p[0][1].to_i, supplier: supplier) product_db.update!(name: p[0][0], product_id: p[0][1].to_i, supplier: supplier) product_db.save! number_dead = 0 else extra = options[:limit] ? "#{current_pid - (options[:start_pid] + options[:limit])}" : "" extra += " #{options[:max_dead] - number_dead}" logger.info "None at #{current_pid} - try #{extra} more" number_dead += 1 end current_pid += 1 if options[:limit] && current_pid > options[:start_pid] + options[:limit] logger.info "reached limit" break end if number_dead > options[:max_dead] logger.info "reached max dead" break end end if RawgentoModels::RemoteProduct.any? logger.info "Last at #{RawgentoModels::RemoteProduct.unscoped.where(supplier: supplier).order(product_id: :desc).first.product_id}" logger.info "Found #{RawgentoModels::RemoteProduct.count - product_count} remote products" else logger.warn "No remote products found!" end if options[:mail] logger.debug("Sending mail") Rawbotz::mail("rawbotz remote product update", "Found #{RawgentoModels::RemoteProduct.count - product_count} remote products") end end main options exit 0