lib/www/mechanize/page.rb in mechanize-0.9.2 vs lib/www/mechanize/page.rb in mechanize-0.9.3

- old
+ new

@@ -19,24 +19,32 @@ # class Page < WWW::Mechanize::File extend Forwardable attr_accessor :mech - attr_accessor :encoding def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) @encoding = nil - response.each do |header,v| + + method = response.respond_to?(:each_header) ? :each_header : :each + response.send(method) do |header,v| next unless v =~ /charset/i - @encoding = v.split('=').last.strip + encoding = v.split('=').last.strip + @encoding = encoding unless encoding == 'none' end + + # Force the encoding to be 8BIT so we can perform regular expressions. + # We'll set it to the detected encoding later + body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body + @encoding ||= Util.detect_charset(body) - body = Util.to_native_charset(body, @encoding) rescue body super(uri, response, body, code) @mech ||= mech + @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i + raise Mechanize::ContentTypeError.new(response['content-type']) unless response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil end @@ -44,19 +52,31 @@ @title ||= if parser && search('title').inner_text.length > 0 search('title').inner_text end end + def encoding=(encoding) + @encoding = encoding + + if @parser && @parser.encoding.downcase != encoding.downcase + # lazy reinitialize the parser with the new encoding + @parser = nil + end + end + + def encoding + parser.respond_to?(:encoding) ? parser.encoding : nil + end + def parser return @parser if @parser if body && response - html_body = body.length > 0 ? body : '<html></html>' - if WWW::Mechanize.html_parser == Nokogiri::HTML - @parser = Mechanize.html_parser.parse(html_body, nil, @encoding) + if mech.html_parser == Nokogiri::HTML + @parser = mech.html_parser.parse(html_body, nil, @encoding) else - @parser = Mechanize.html_parser.parse(html_body) + @parser = mech.html_parser.parse(html_body) end end @parser end @@ -116,12 +136,13 @@ def meta @meta ||= search('meta').map do |node| next unless node['http-equiv'] && node['content'] (equiv, content) = node['http-equiv'], node['content'] if equiv && equiv.downcase == 'refresh' - if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i - node['href'] = $1 + Meta.parse(content, uri) do |delay, href| + node['delay'] = delay + node['href'] = href Meta.new(node, @mech, self) end end end.compact end @@ -137,9 +158,19 @@ end def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } + end + + private + + def html_body + if body + body.length > 0 ? body : '<html></html>' + else + '' + end end end end end