lib/spieker/link_scraper.rb in spieker-0.0.7 vs lib/spieker/link_scraper.rb in spieker-0.0.8

- old
+ new

@@ -37,20 +37,27 @@ @links ||= drive_page_for_links end def drive_page_for_links begin - visit @url.path + "#!lang=#{@lang}" - links = page.all('a').map { |el| el[:href]} + query = if @url.query + "?#{@url.query}" + else + "" + end + visit @url.path + query + "#!lang=#{@lang}" + # Capybara + selenium causes some links not to be found. There doesn't seem to be any method to that. + # Cool is tho, in JS it's a lot faster as well + links = page.evaluate_script('document.getElementsByTagName(\'a\')').map { |el| el['href'] } begin # Our javascript adds a class if the content has been succesfully submitted page.find(:css, 'html.tolq-content-updated') rescue Capybara::Ambiguous, Capybara::ElementNotFound => e - puts "Something went wrong with submitting the content #{e.inspect}" + puts "Something went wrong with submitting the content: #{e.message}" end links rescue => e - puts "Error parsing #{@url.to_s}, #{e.inspect}" + puts "Error parsing #{@url.to_s}, #{e.message}" [] end end def cleaned_up_links(links)