lib/www/mechanize/page.rb in mechanize-0.9.2 vs lib/www/mechanize/page.rb in mechanize-0.9.3
- old
+ new
@@ -19,24 +19,32 @@
#
class Page < WWW::Mechanize::File
extend Forwardable
attr_accessor :mech
- attr_accessor :encoding
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
@encoding = nil
- response.each do |header,v|
+
+ method = response.respond_to?(:each_header) ? :each_header : :each
+ response.send(method) do |header,v|
next unless v =~ /charset/i
- @encoding = v.split('=').last.strip
+ encoding = v.split('=').last.strip
+ @encoding = encoding unless encoding == 'none'
end
+
+ # Force the encoding to be 8BIT so we can perform regular expressions.
+ # We'll set it to the detected encoding later
+ body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body
+
@encoding ||= Util.detect_charset(body)
- body = Util.to_native_charset(body, @encoding) rescue body
super(uri, response, body, code)
@mech ||= mech
+ @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i
+
raise Mechanize::ContentTypeError.new(response['content-type']) unless
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
@parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end
@@ -44,19 +52,31 @@
@title ||= if parser && search('title').inner_text.length > 0
search('title').inner_text
end
end
+ def encoding=(encoding)
+ @encoding = encoding
+
+ if @parser && @parser.encoding.downcase != encoding.downcase
+ # lazy reinitialize the parser with the new encoding
+ @parser = nil
+ end
+ end
+
+ def encoding
+ parser.respond_to?(:encoding) ? parser.encoding : nil
+ end
+
def parser
return @parser if @parser
if body && response
- html_body = body.length > 0 ? body : '<html></html>'
- if WWW::Mechanize.html_parser == Nokogiri::HTML
- @parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
+ if mech.html_parser == Nokogiri::HTML
+ @parser = mech.html_parser.parse(html_body, nil, @encoding)
else
- @parser = Mechanize.html_parser.parse(html_body)
+ @parser = mech.html_parser.parse(html_body)
end
end
@parser
end
@@ -116,12 +136,13 @@
def meta
@meta ||= search('meta').map do |node|
next unless node['http-equiv'] && node['content']
(equiv, content) = node['http-equiv'], node['content']
if equiv && equiv.downcase == 'refresh'
- if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
- node['href'] = $1
+ Meta.parse(content, uri) do |delay, href|
+ node['delay'] = delay
+ node['href'] = href
Meta.new(node, @mech, self)
end
end
end.compact
end
@@ -137,9 +158,19 @@
end
def iframes
@iframes ||=
search('iframe').map { |node| Frame.new(node, @mech, self) }
+ end
+
+ private
+
+ def html_body
+ if body
+ body.length > 0 ? body : '<html></html>'
+ else
+ ''
+ end
end
end
end
end