Sha256: 2c3c441e5652e3756ccfa832f104e87f536406ae6798b9e190472476df703a1a
Contents?: true
Size: 1.34 KB
Versions: 5
Compression:
Stored size: 1.34 KB
Contents
require 'uri' module LinkThumbnailer module Doc def doc_base_href base = at('//head/base') base['href'] if base end def img_srcs search('//img').map { |i| i['src'] }.compact end def img_abs_urls(base_url = nil) result = [] img_srcs.each do |i| begin u = URI(i) rescue URI::InvalidURIError next end result << if u.is_a?(URI::HTTP) u else URI.join(base_url || doc_base_href || source_url, i) end end result end def title css('title').text.strip end def description if element = xpath("//meta[translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'description' and @content]").first return element.attributes['content'].value.strip end css('body p').each do |node| if !node.has_attribute?('style') && node.first_element_child.nil? return node.text.strip end end nil end def canonical_url if element = xpath("//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'canonical' and @href]").first return element.attributes['href'].value.strip end nil end attr_accessor :source_url end end
Version data entries
5 entries across 5 versions & 2 rubygems