Sha256: 8eab7784bb7ac65c4e98547a60401ae3e2f9d80e98ffae4be1bf951a79338b53

Contents?: true

Size: 1.6 KB

Versions: 4

Compression:

Stored size: 1.6 KB

Contents

class ReportCommand
  def self.start(opts)

    if opts[:output]
      options = opts.to_hash.delete_if { |k, v| v.nil?}
      options[:quiet] = !opts[:verbose]

      if options.has_key?(:seed_url_file)
        filename = options.delete(:seed_url_file)
        options[:seed_urls] = []
        File.open(filename, "r") do |f|
          f.each_line do |line|
            options[:seed_urls] << line
          end
        end
      end

      @crawler = CobwebCrawler.new({:cache_type => :full, :raise_exceptions => true}.merge(options))

      columns = nil

      CSV.open(options[:output], "wb", :force_quotes => true) do |csv|

        statistics = @crawler.crawl(options[:url]) do |page|
          puts "Reporting on #{page[:url]} [#{page[:status_code]}]"
          @doc = page[:body]
          page["link_rel"] = scope.link_tag_with_rel("canonical")["href"]
          page["title"] = scope.head_tag.title_tag.contents
          page["description"] = scope.meta_tag_with_name("description")["content"]
          page["keywords"] = scope.meta_tag_with_name("keywords")["content"]
          page["img tag count"] = scope.img_tags.count
          page["scripts in body"] = scope.body_tag.script_tags.count
          page["img without alt count"] = scope.img_tags.select{|node| node[:alt].nil? || node[:alt].strip().empty?}.count
          page["img alt"] = scope.img_tags_with_alt.map{|node| node[:alt]}.uniq

          if !columns
            columns = page.keys.reject{|k| k==:body || k==:links}
            csv << columns.map{|k| k.to_s}
          end
          csv << columns.map{|k| page[k]}
        end
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
cobweb-1.2.1 lib/report_command.rb
cobweb-1.2.0 lib/report_command.rb
cobweb-1.1.0 lib/report_command.rb
cobweb-1.0.29 lib/report_command.rb