module WebsiteInspector class Website include Methadone::CLILogging include Methadone::Main include Methadone::SH attr_accessor :search_terms, :url, :page, :title, :html, :body, :links, :site_data def self.start website_url, search_terms agent = Mechanize.new @search_terms = clean_input search_terms @regex_search = Regexp.union(@search_terms) @website_url = Website.validate_url website_url @site_data = Hash.new @site_data["PAGE"] = @page = agent.get(@website_url) # Mechanize Page @site_data["TITLE"] = @title = @page.title # String @site_data["HTML/XML"] = @html = @page.parser # Nokogiri HTML @site_data["WEBPAGE SOURCE"] = @source = @page.body # String @site_data["LINKS"] = @links = @page.links # Array @site_data["IMAGES"] = @images = @page.images @site_data["IMAGE URLS"] = @image_urls = @page.image_urls @site_data["PAGE FORMS"] = @forms = @page.forms end def self.validate_url website_url site = website_url.chomp(",") #get rid of trailing commas uri = URI(site) if uri.instance_of?(URI::Generic) uri = URI::HTTP.build({:host => uri.to_s}) end uri end def self.clean_input search_terms terms = Array.new search_terms.each do |term| term = term.to_s.gsub(",", "") terms << term end terms end def self.search data no_match = true sections = data.scan /\S.{1,80}(?!\S)/ sections.each do |section| matches = section.scan(@regex_search) if !matches.empty? no_match = false puts "While scanning: '#{section}'" puts "found: #{matches.join(', ')}\n" end end if no_match puts "No matches found for: #{@search_terms.join(', ')}\n" end end def self.console_print section_name puts "---------------------------\nSEARCHING #{section_name}" end def self.search_all @site_data.each do |k, v| Website.console_print "#{k}" Website.search v.to_s.downcase end end def self.search_type key Website.console_print "#{key}" value = @site_data[key] if value.kind_of? Array value.each do |v| Website.search v.to_s.downcase end else Website.search value.to_s.downcase end end end end