Sha256: c9fb0ce415506784c6fa10c5ac8f8b79b66bf241998626877405dc7d52b99478

Contents?: true

Size: 1.91 KB

Versions: 2

Compression:

Stored size: 1.91 KB

Contents

#!/usr/bin/env ruby

lib = File.expand_path(File.dirname(__FILE__) + '/../lib')
$LOAD_PATH.unshift(lib) if File.directory?(lib) && !$LOAD_PATH.include?(lib)

require 'cohesion'
require 'slop'
require 'csv'

opts = Slop.parse(:help => true) do
  banner 'Usage: cohesion [options]'

  on 'url=', 'URL to start crawl from'
  on 'internal_urls=', 'Url patterns to include', :as => Array
  on 'external_urls=', 'Url patterns to exclude', :as => Array
  on 'seed_urls=', "Seed urls", :as => Array
  on 'crawl_limit=', 'Limit the crawl to a number of urls', :as => Integer
  on 'thread_count=', "Set the number of threads used", :as => Integer
  on 'timeout=', "Sets the timeout for http requests", :as => Integer
  on 'cache=', "Sets the timeout for the cache, leave blank for no cache"

  on 'output=', 'Path to output data to'
  on 'output_format=', "Output format, csv or json"

  on 'v', 'verbose', 'Display crawl information'
  on 'd', 'debug', 'Display debug information'
  on 'w', 'web_statistics', 'Start web stats server'
end

if opts[:url]

  options = opts.to_hash.delete_if { |k, v| v.nil? || k == :url}

  failures = Cohesion::Check.site(opts[:url], options)
  if failures.count == 0
    exit(true)
  else
    if opts[:output]
      output = []
      failures.each do |failure|
        output << {:error_page => failure[:issue][:url], :inbound_links => failure[:inbound]}
      end

      opts[:output_format] = "json" unless opts[:output_format]
      if opts[:output_format] == "json"
        File.open(opts[:output], 'w') do |f|
          f.write output.to_json
        end
      elsif opts[:output_format] == "csv"
        CSV.open(opts[:output], "wb") do |csv|
          csv << ["404 Url", "Page that contains link"]
          output.each do |line|
            line[:inbound_links].each do |link|
              csv << [line[:error_page], link]
            end
          end
        end
      end
    end
    exit(false)
  end
else
  puts
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
cohesion-1.0.0 bin/cohesion
cohesion-0.0.7 bin/cohesion