Sha256: 9fa1bbcbb70cc4a2c4da9fbff6d4669486e5f7df0740514cbc0da4388a13751b

Contents?: true

Size: 1.82 KB

Versions: 1

Compression:

Stored size: 1.82 KB

Contents

require 'nokogiri'
require 'httparty'

module RapGenius
  module Scraper
    # Custom HTTParty parser that parses the returned body with Nokogiri
    class NokogiriParser < HTTParty::Parser
      SupportedFormats.merge!('text/html' => :html)

      def html
        Nokogiri::HTML(body)
      end
    end

    # HTTParty client
    #
    # Sets some useful defaults for all of our requests.
    #
    # See Scraper#fetch
    class Client
      include HTTParty

      format   :html
      parser   NokogiriParser
      base_uri 'http://rapgenius.com'
      headers  'User-Agent' => "rapgenius.rb v#{RapGenius::VERSION}"

      # Perform a search
      #
      # query - Query string
      #
      # Note: Currently only supports Song searching
      #
      # Returns a non-parsed (i.e., plaintext) response body
      def self.search(query)
        response = get("/search/quick", query: {q: query}, headers: default_search_headers)
        response.body
      end

      private

      # Default headers for a search request
      #
      # Tells RapGenius we only want Javascript so we get plaintext results back.
      def self.default_search_headers
        {
          'X-Requested-With' => 'XMLHttpRequest',
          'Referer'          => base_uri,
          'Accept'           => 'application/x-javascript,text/javascript'
        }
      end
    end

    BASE_URL = Client.base_uri + "/".freeze

    attr_reader :url

    def url=(url)
      unless url =~ /^https?:\/\//
        @url = BASE_URL + url.gsub(/^\//, '')
      else
        @url = url
      end
    end

    def document
      @document ||= fetch(@url)
    end

    private

    def fetch(url)
      response = Client.get(url)

      if response.code != 200
        raise ScraperError, "Received a #{response.code} HTTP response"
      end

      response.parsed_response
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
rapgenius-0.1.0 lib/rapgenius/scraper.rb