lib/iconoclasm/extractor.rb in iconoclasm-1.0.0 vs lib/iconoclasm/extractor.rb in iconoclasm-1.0.1

- old
+ new

@@ -8,54 +8,53 @@ base.class_eval { include Iconoclasm::Downloader } end def extract_favicon_from(url, content = nil) catch(:done) do - base_url = base_url_of(url) - extract_favicon_from_head_of(base_url, content) - extract_favicon_from_naive_guess(base_url) - raise Iconoclasm::MissingFavicon.new(base_url) + extract_favicon_from_head_of(url, content) + extract_favicon_from_naive_guess(base_url_of(url)) + raise Iconoclasm::MissingFavicon.new(url) end end private - def extract_favicon_from_head_of(base_url, content = nil) - if document = document_from(base_url, content) + def extract_favicon_from_head_of(url, content = nil) + if document = document_from(url, content) favicon_links = find_favicon_links_in(document) throw(:done, { - :url => href_of(favicon_links.first), + :url => href_of(favicon_links.first, :base_url => base_url_of(url)), :content_type => type_of(favicon_links.first) }) unless favicon_links.empty? end end - def document_from(base_url, content = nil) + def document_from(url, content = nil) if content Nokogiri::XML(content) else - response = get(base_url) - Nokogiri::XML(response.body_str) if response.response_code == 200 + response = get(url) + Nokogiri::XML(response.body) if response.code == 200 end end def extract_favicon_from_naive_guess(base_url) naive_url = "#{base_url}/favicon.ico" response = get(naive_url) - headers = Iconoclasm::Headers.new(response.header_str) - if response.response_code == 200 + headers = Iconoclasm::Headers.new(response.headers) + if response.code == 200 throw(:done, { :url => naive_url, - :content_length => header.content_length, + :content_length => headers.content_length, :content_type => headers.content_type, :data => response.body_str }) end end def find_favicon_links_in(document) - document.xpath('//link[favicon_link(.)]', Class.new { + document.css('link:favicon_link', Class.new { def favicon_link(node_set) node_set.find_all { |node| node['rel'] && node['rel'] =~ /^(?:shortcut\s)?icon$/i } end }.new) end @@ -63,12 +62,18 @@ def base_url_of(url) uri = URI.parse(url) "#{uri.scheme}://#{uri.host}" end - def href_of(node) + def href_of(node, options = {}) href = normal_node_attributes(node)['href'] - href.value if href + if href + relative?(href.value) ? "#{options[:base_url]}#{href.value}" : href.value + end + end + + def relative?(href) + href =~ /^[\.\/]/ end def type_of(node) type = normal_node_attributes(node)['type'] type.value if type \ No newline at end of file