lib/rapgenius/scraper.rb in rapgenius-0.0.2 vs lib/rapgenius/scraper.rb in rapgenius-0.0.3
- old
+ new
@@ -1,35 +1,57 @@
require 'nokogiri'
require 'httparty'
module RapGenius
module Scraper
- BASE_URL = "http://rapgenius.com/".freeze
+ # Custom HTTParty parser that parses the returned body with Nokogiri
+ class NokogiriParser < HTTParty::Parser
+ SupportedFormats.merge!('text/html' => :html)
- attr_reader :url
+ def html
+ Nokogiri::HTML(body)
+ end
+ end
+ # HTTParty client
+ #
+ # Sets some useful defaults for all of our requests.
+ #
+ # See Scraper#fetch
+ class Client
+ include HTTParty
+ format :html
+ parser NokogiriParser
+ base_uri 'http://rapgenius.com'
+ headers 'User-Agent' => "rapgenius.rb v#{RapGenius::VERSION}"
+ end
+
+ BASE_URL = Client.base_uri + "/".freeze
+
+ attr_reader :url
+
def url=(url)
- if !(url =~ /^https?:\/\//)
- @url = "#{BASE_URL}#{url}"
+ unless url =~ /^https?:\/\//
+ @url = BASE_URL + url
else
@url = url
end
end
def document
- @document ||= Nokogiri::HTML(fetch(@url))
+ @document ||= fetch(@url)
end
private
+
def fetch(url)
- response = HTTParty.get(url)
+ response = Client.get(url)
if response.code != 200
raise ScraperError, "Received a #{response.code} HTTP response"
end
- response.body
+ response.parsed_response
end
-
end
-end
\ No newline at end of file
+end