lib/spieker/link_scraper.rb in spieker-0.0.7 vs lib/spieker/link_scraper.rb in spieker-0.0.8
- old
+ new
@@ -37,20 +37,27 @@
@links ||= drive_page_for_links
end
def drive_page_for_links
begin
- visit @url.path + "#!lang=#{@lang}"
- links = page.all('a').map { |el| el[:href]}
+ query = if @url.query
+ "?#{@url.query}"
+ else
+ ""
+ end
+ visit @url.path + query + "#!lang=#{@lang}"
+ # Capybara + selenium causes some links not to be found. There doesn't seem to be any method to that.
+ # Cool is tho, in JS it's a lot faster as well
+ links = page.evaluate_script('document.getElementsByTagName(\'a\')').map { |el| el['href'] }
begin
# Our javascript adds a class if the content has been succesfully submitted
page.find(:css, 'html.tolq-content-updated')
rescue Capybara::Ambiguous, Capybara::ElementNotFound => e
- puts "Something went wrong with submitting the content #{e.inspect}"
+ puts "Something went wrong with submitting the content: #{e.message}"
end
links
rescue => e
- puts "Error parsing #{@url.to_s}, #{e.inspect}"
+ puts "Error parsing #{@url.to_s}, #{e.message}"
[]
end
end
def cleaned_up_links(links)