Sha256: 2c3c441e5652e3756ccfa832f104e87f536406ae6798b9e190472476df703a1a

Contents?: true

Size: 1.34 KB

Versions: 5

Compression:

Stored size: 1.34 KB

Contents

require 'uri'

module LinkThumbnailer

  module Doc

    def doc_base_href
      base = at('//head/base')
      base['href'] if base
    end

    def img_srcs
      search('//img').map { |i| i['src'] }.compact
    end

    def img_abs_urls(base_url = nil)
      result = []

      img_srcs.each do |i|
        begin
          u = URI(i)
        rescue URI::InvalidURIError
          next
        end

        result << if u.is_a?(URI::HTTP)
                    u
                  else
                    URI.join(base_url || doc_base_href || source_url, i)
                  end
      end

      result
    end

    def title
      css('title').text.strip
    end

    def description
      if element = xpath("//meta[translate(@name,'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'description' and @content]").first
        return element.attributes['content'].value.strip
      end

      css('body p').each do |node|
        if !node.has_attribute?('style') && node.first_element_child.nil?
          return node.text.strip
        end
      end

      nil
    end

    def canonical_url
      if element = xpath("//link[translate(@rel, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ','abcdefghijklmnopqrstuvwxyz') = 'canonical' and @href]").first
        return element.attributes['href'].value.strip
      end
      nil
    end

    attr_accessor :source_url

  end

end

Version data entries

5 entries across 5 versions & 2 rubygems

Version Path
link_thumbnailer-1.1.1 lib/link_thumbnailer/doc.rb
link_thumbnailer-1.1.0 lib/link_thumbnailer/doc.rb
rcarvalho-link_thumbnailer-1.0.9.1 lib/link_thumbnailer/doc.rb
link_thumbnailer-1.0.9 lib/link_thumbnailer/doc.rb
link_thumbnailer-1.0.8 lib/link_thumbnailer/doc.rb