lib/wombat/processing/parser.rb in wombat-2.2.1 vs lib/wombat/processing/parser.rb in wombat-2.3.0
- old
+ new
@@ -16,10 +16,17 @@
module Processing
module Parser
attr_accessor :mechanize, :context, :response_code, :page
def initialize
- @mechanize = Mechanize.new
+ # http://stackoverflow.com/questions/6918277/ruby-mechanize-web-scraper-library-returns-file-instead-of-page
+ @mechanize = Mechanize.new { |a|
+ a.post_connect_hooks << lambda { |_,_,response,_|
+ if response.content_type.nil? || response.content_type.empty?
+ response.content_type = 'text/html'
+ end
+ }
+ }
@mechanize.set_proxy(*Wombat.proxy_args) if Wombat.proxy_args
end
def parse(metadata)
@context = parser_for metadata