require 'capybara'
require 'capybara/selenium/driver'
require 'selenium/webdriver'

module Spieker
  class LinkScraper

    include Capybara::DSL
    attr_writer :links

    def initialize(url, lang: 'en')
      @url = URI.parse(url)
      @lang = lang
      Capybara.app_host = app_host

      Capybara.register_driver :tolq do |app|
        profile = Selenium::WebDriver::Firefox::Profile.new
        profile['general.useragent.override'] = "Mozilla/5.0 (compatible; Tolq Spieker/#{Spieker::VERSION}; +http://www.tolq.com)"

        Capybara::Selenium::Driver.new(app, :profile => profile)
      end

      Capybara.current_driver = :tolq
    end

    def result
      cleaned_up_links(found_links)
    end

    def app_host
      "#{@url.scheme}://#{@url.hostname}"
    end

    private

    def found_links
      @links ||= drive_page_for_links 
    end

    def drive_page_for_links
      begin
        query = if @url.query
                  "?#{@url.query}"
                else
                  ""
                end
        visit @url.path + query + "#!lang=#{@lang}"
        # Capybara + selenium causes some links not to be found. There doesn't seem to be any method to that.
        # Cool is tho, in JS it's a lot faster as well
        links = begin
                  page.evaluate_script('document.getElementsByTagName(\'a\')').map { |el| el['href'] }
                rescue Net::ReadTimeout
                  page.all('a').map { |el| el['href'] }
                end

        begin
          # Our javascript adds a class if the content has been succesfully submitted
          page.find(:css, 'html.tolq-content-updated')
        rescue Capybara::Ambiguous, Capybara::ElementNotFound => e
          puts "Something went wrong with submitting the content: #{e.message}"
        end
        links
      rescue => e
        puts "Error parsing #{@url.to_s}, #{e.message}"
        []
      end
    end

    def cleaned_up_links(links)
      links.select { |link|
        LinkValidator.new(link, @url.to_s).valid?
      }.map(&method(:filter_hash)).compact.uniq
    end

    def filter_hash(link)
      if match = link.match(/(.*)#(.*)$/)
        match[1]
      else
        link
      end
    end
  end
end

class NullStream
  def puts; end
end