lib/maxwell/converter.rb in maxwell-0.2.0 vs lib/maxwell/converter.rb in maxwell-0.3.0
- old
+ new
@@ -1,22 +1,45 @@
require 'nokogiri'
require 'httpclient'
-class Maxwell
- module Converter
- def self.execute(url)
- client = HTTPClient.new(
- default_header: {
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
- }
- )
+require 'nokogiri'
+require 'capybara'
+require 'capybara/poltergeist'
- html = begin
- client.get_content(url)
- rescue
- ""
+module Maxwell
+ class Converter
+ @user_agent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"
+ class << self
+ def call(url, use_poltergeist=false)
+ use_poltergeist ? call_with_js(url) : call_without_js(url)
end
- Nokogiri::HTML(html)
+ def call_without_js(url)
+ client = HTTPClient.new(
+ default_header: {
+ "User-Agent" => @user_agent
+ }
+ )
+
+ html = begin
+ client.get_content(url)
+ rescue
+ ""
+ end
+
+ Nokogiri::HTML(html)
+ end
+
+ def call_with_js(url)
+ Capybara.register_driver :poltergeist do |app|
+ Capybara::Poltergeist::Driver.new(app, { js_errors: false, timeout: 1000 })
+ end
+ Capybara.default_selector = :xpath
+ session = Capybara::Session.new(:poltergeist)
+
+ session.driver.headers = { 'User-Agent' => @user_agent }
+ session.visit url
+ Nokogiri::HTML(session.html)
+ end
end
end
end