Sha256: 8969ebcfb71f75af722ee0dfe5e4ce799a40f5626bd9e7fc629c59c497f1c20f

Contents?: true

Size: 1.67 KB

Versions: 3

Compression:

Stored size: 1.67 KB

Contents

require 'json'
require 'faraday'
require 'faraday/follow_redirects'
require 'nokogiri'
require 'manifique/metadata'
require 'manifique/errors'

module Manifique
  class WebClient

    def initialize(options={})
      @options = options
      @url = options[:url]
      @metadata = Metadata.new(url: @url)
    end

    def fetch_metadata
      fetch_website

      if manifest = fetch_web_manifest
        @metadata.load_from_web_manifest(manifest)
      end
      @metadata.load_from_html(@html)

      @metadata
    end

    private

    def fetch_website
      res = do_get_request @url
      @html = Nokogiri::HTML(res.body)
    rescue
      false
    end

    def fetch_web_manifest
      return false unless manifest_url = discover_web_manifest_url(@html)

      unless manifest_url.match(/^https?\:\/\//)
        # Link is just the manifest path, not an absolute URL
        manifest_url = [@url.gsub(/\/$/, ''), manifest_url.gsub(/^\//, '')].join('/')
      end

      res = do_get_request manifest_url

      JSON.parse(res.body)
    end

    def do_get_request(url)
      conn = Faraday.new do |faraday|
        faraday.response :follow_redirects
        faraday.adapter Faraday.default_adapter
      end
      res = conn.get url
      if res.status < 400
        res
      else
        raise Manifique::Error.new "Failed with HTTP status #{res.status}", "http_#{res.status}", url
      end
    rescue Faraday::ConnectionFailed, Faraday::TimeoutError, Faraday::SSLError
      raise Manifique::Error.new "Failed to connect", "connection_failed", url
    end

    def discover_web_manifest_url(html)
      html.at_css("link[rel=manifest]").attributes["href"].value
    rescue
      false
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
manifique-1.1.0 lib/manifique/web_client.rb
manifique-1.0.1 lib/manifique/web_client.rb
manifique-1.0.0 lib/manifique/web_client.rb