lib/extensions/http.rb in radiospieler-0.3.7 vs lib/extensions/http.rb in radiospieler-0.3.8
- old
+ new
@@ -1,7 +1,8 @@
require 'net/http'
require 'simple_cache'
+require 'nokogiri'
# The Http module defines a
#
# Http.get(url)
#
@@ -21,22 +22,29 @@
end
nil
end
end
- def get(url, max_age = MaxAge.for(url))
+ def get(url, max_age = nil)
+ body, headers = get_body_and_headers(url, max_age)
+ body
+ end
+
+ def get_body_and_headers(url, max_age = nil)
+ max_age ||= MaxAge.for(url)
+
App.logger.benchmark("[GET] #{url}", :minimum => 20) do
- SimpleCache.cached(url, max_age) do
+ SimpleCache.cached("f-#{url}", max_age) do
App.logger.debug "[GET] #{url}"
- get_(url)
+ get_body_and_headers_(url)
end
end
end
private
- def get_(uri_str, limit = 10)
+ def get_body_and_headers_(uri_str, limit = 10)
raise 'too many redirections' if limit == 0
uri = URI.parse(uri_str)
http = Net::HTTP.new(uri.host, uri.port)
@@ -47,15 +55,47 @@
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
case response
when Net::HTTPSuccess then
- response.body
+ body, headers = response.body, response.to_hash
+ [ reencode(body, response["Content-Type"]), headers ]
when Net::HTTPRedirection then
location = response['location']
App.logger.debug "redirected to #{location}"
- get_(location, limit - 1)
+ get_body_and_headers_(location, limit - 1)
else
- response.value
+ [ response.value, nil ]
end
+ end
+
+ def reencode(body, content_type)
+ encodings = [ "ISO-8859-1", "UTF-8" ]
+
+ encodings.unshift($1) if content_type =~ /; charset=(\S+)/
+ encodings.unshift(html_encoding(body)) if content_type =~ /html/
+
+ force_valid_encoding body, *encodings
+ end
+
+ def force_valid_encoding(string, *encodings)
+ encodings.each do |enc|
+ next unless enc
+ begin
+ s = string.force_encoding(enc)
+ next unless s.valid_encoding?
+ return s.encode("UTF-8")
+ rescue Encoding::UndefinedConversionError
+ end
+ end
+
+ nil
+ end
+
+ def html_encoding(html)
+ doc = Nokogiri.HTML(html)
+ node = doc.css("meta[http-equiv='Content-Type']").first
+
+ return unless node && node["content"] =~ /; charset=(\S+)/
+ $1
end
end