Sha256: 781e10e75ace710974ee967fb3c21d31ac6fbea2c45751959a76ad02afa8adbf
Contents?: true
Size: 1.44 KB
Versions: 1
Compression:
Stored size: 1.44 KB
Contents
require "ImageScraper/version" require "nokogiri" require "open-uri" module ImageScraper #The constructor def self.initialize(url, folder = "Images") @url = url @folder = folder end #A function to get HTML of page at a url def self.getPage() @page = Nokogiri::HTML(open(@url).read) end def self.getNewUrl() new_url = /([a-z0-9]|_)+.(html)|(php)$/.match(@url) puts new_url.to_s n = new_url.to_s return @url.slice! n end #A method to get image links def self.getImgLinks() img = @page.css('img') @imgLinks = Array.new new_url = self.getNewUrl() if @url[@url.length-1] != '/' @url += "/" end print new_url img.each do |i| @imgLinks.push(@url+i["src"]) end end #A method to download images def self.download() len = @imgLinks.length a = @imgLinks files = @files len.times do |f| puts "#{a[f]} found" File.open(files[f], "w") do |fo| fo.write open(a[f]).read end puts "#{files[f]} downloaded" end end #A method to download all images def self.getImgNames() if not File.exists?(@folder) Dir.mkdir(@folder) puts "#{@folder} Directory Created" end @files = Array.new img = @page.css('img') img.each do |i| nodes = i["src"].split("/") @files.push(@folder + "/" + nodes[nodes.length - 1]) end end #A method to download all files at a given url def self.Scrap() self.getPage puts "Page Found" self.getImgNames self.getImgLinks self.download end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
ImageScraper-0.0.2 | lib/ImageScraper.rb |