lib/image_scraper.rb in image_scraper-0.1.0 vs lib/image_scraper.rb in image_scraper-0.1.1
- old
+ new
@@ -1,80 +1,8 @@
require 'pp'
require 'rails'
require 'open-uri'
require 'nokogiri'
-module ImageScraper
- class Client
- attr_accessor :url, :convert_to_absolute_url, :include_css_images, :include_css_data_images, :doc
-
- def initialize(url,options={})
- options.reverse_merge!(:convert_to_absolute_url=>true,:include_css_images=>true, :include_css_data_images=>false)
- @url = url
- @convert_to_absolute_url = options[:convert_to_absolute_url]
- @include_css_images = options[:include_css_images]
- @include_css_data_images = options[:include_css_data_images]
- @doc = Nokogiri::HTML(open url)
- end
-
- def image_urls
- images = page_images
- images += stylesheet_images if include_css_images
- images
- end
-
- def page_images
- urls = []
- doc.xpath("//img").each do |img|
- image = img["src"]
- image = ImageScraper::Util.absolute_url(url,image) if convert_to_absolute_url
- urls << image
- end
- urls
- end
-
- def stylesheet_images
- images = []
- stylesheets.each do |stylesheet|
- file = open(stylesheet)
- css = file.string rescue IO.read(file)
-
- images += css.scan(/url\((.*?)\)/).collect do |image_url|
- if image_url.include?("data:image") and @include_css_data_images
- image_url[0]
- else
- @convert_to_absolute_url ? ImageScraper::Util.absolute_url(url,image_url[0]) : image_url
- end
- end
- end
- images
- end
-
- def stylesheets
- doc.xpath('//link[@rel="stylesheet"]').collect do |stylesheet|
- ImageScraper::Util.absolute_url(url,stylesheet['href'])
- end
- end
- end
-
- module Util
- def self.absolute_url(url,asset=nil)
- return domain(url) + path(url) if asset.nil? and asset.include("://")
- return asset if asset.include?("://")
- return domain(url)+asset if asset[0]=="/"
- return domain(url) =~ /\/$/ ? domain(url)+asset : domain(url)+"/"+asset
- end
-
- def self.domain(url)
- uri = URI.parse(url)
- "#{uri.scheme}://#{uri.host}"
- end
-
- def self.path(url)
- uri = URI.parse(url)
- uri.path
- end
- end
-
- class Railtie < Rails::Railtie
- end
-end
+require 'image_scraper/railtie'
+require 'image_scraper/util'
+require 'image_scraper/client'
\ No newline at end of file