Sha256: 1304963455942d1f3f0b8f034603bfc89147b8f4ebc3c4628ccefa14aef03a9a

Contents?: true

Size: 1.09 KB

Versions: 1

Compression:

Stored size: 1.09 KB

Contents

require 'nokogiri'
require 'httparty'

module RapGenius
  module Scraper
    # Custom HTTParty parser that parses the returned body with Nokogiri
    class NokogiriParser < HTTParty::Parser
      SupportedFormats.merge!('text/html' => :html)

      def html
        Nokogiri::HTML(body)
      end
    end

    # HTTParty client
    #
    # Sets some useful defaults for all of our requests.
    #
    # See Scraper#fetch
    class Client
      include HTTParty

      format   :html
      parser   NokogiriParser
      base_uri 'http://rapgenius.com'
      headers  'User-Agent' => "rapgenius.rb v#{RapGenius::VERSION}"
    end

    BASE_URL = Client.base_uri + "/".freeze

    attr_reader :url

    def url=(url)
      unless url =~ /^https?:\/\//
        @url = BASE_URL + url
      else
        @url = url
      end
    end

    def document
      @document ||= fetch(@url)
    end

    private

    def fetch(url)
      response = Client.get(url)

      if response.code != 200
        raise ScraperError, "Received a #{response.code} HTTP response"
      end

      response.parsed_response
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
rapgenius-0.0.3 lib/rapgenius/scraper.rb