Sha256: 781e10e75ace710974ee967fb3c21d31ac6fbea2c45751959a76ad02afa8adbf

Contents?: true

Size: 1.44 KB

Versions: 1

Compression:

Stored size: 1.44 KB

Contents

require "ImageScraper/version"
require "nokogiri"
require "open-uri"
module ImageScraper

	
	#The constructor
	def self.initialize(url, folder = "Images")
		@url = url
		@folder = folder
	end

	#A function to get HTML of page at a url
	def self.getPage()
		@page = Nokogiri::HTML(open(@url).read)
	end


	def self.getNewUrl()
		new_url = /([a-z0-9]|_)+.(html)|(php)$/.match(@url)
		puts new_url.to_s
		n = new_url.to_s
		return @url.slice! n
	end

	#A method to get image links
	def self.getImgLinks()
		img = @page.css('img')
		@imgLinks = Array.new

		new_url = self.getNewUrl()
		
		if @url[@url.length-1] != '/'
			@url += "/"
		end
		print new_url

		img.each do |i|
			@imgLinks.push(@url+i["src"])
		end
	end

	#A method to download images
	def self.download()
		len = @imgLinks.length
		a = @imgLinks
		files = @files
		len.times do |f|
			puts "#{a[f]} found"
			File.open(files[f], "w") do |fo|
				fo.write open(a[f]).read
			end
			puts "#{files[f]} downloaded"
		end
	end

	#A method to download all images
	def self.getImgNames()
		if not File.exists?(@folder)
			Dir.mkdir(@folder)
			puts "#{@folder} Directory Created"
		end
		@files = Array.new
		img = @page.css('img')
		img.each do |i|
			nodes = i["src"].split("/")
			@files.push(@folder + "/" + nodes[nodes.length - 1])
		end
	end

	#A method to download all files at a given url
	def self.Scrap()
		self.getPage
		puts "Page Found"
		self.getImgNames
		self.getImgLinks
		self.download
	end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ImageScraper-0.0.2 lib/ImageScraper.rb