# Copyright (c) 2012, SoundCloud Ltd., Tobias Bielohlawek

require 'mechanize'

# WebmasterTools
# Parameters:
#  required:
#   :username  -
#   :password  -
#
#  optional:
#   :url -
#   :security_token -
class WebmasterTools
  LOGIN     = "https://accounts.google.com/ServiceLogin?service=sitemaps"
  AUTH      = "https://accounts.google.com/ServiceLoginAuth"
  REMOVAL   = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
  INFO      = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
  DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
  ERRORS    = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
  STATS     = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
  TOKEN     = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
  GWT       = "https://www.google.com/webmasters/tools/gwt/"
  GWT_PERM  = "E3DA43109D05B1A5067480CE25494CC2"

  PAYLOAD   = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"

  def initialize(username, password)
    login(username, password)
  end

  def login(username, password)
    page = agent.get(LOGIN)
    page = agent.submit(page.form.tap do |form|
      form.Email  = username
      form.Passwd = password
    end)
    raise "Wrong username + password combination" if page.content.include?(AUTH)
  end

  def dashboard(url)
    url   = CGI::escape norm_url(url)
    page  = agent.get(DASHBOARD % url)
    page.search("#sitemap tbody .rightmost").map do |node|
      { :indexed_web => node.text.gsub(/\D/, '').to_i }
    end
  end

  def security_token(url)
    # looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
    dashboard(url) # to trigger referer
    url  = norm_url(url)
    page = agent.post(TOKEN, PAYLOAD % [GWT, url],  {
      "X-GWT-Module-Base" => GWT,
      "X-GWT-Permutation" => GWT_PERM,
      "Content-Type" => "text/x-gwt-rpc; charset=utf-8",
    })
    page.content.scan(/security_token=([^"]+)/).flatten.first
  end

  def crawl_info(url)
    token = security_token(url)
    url   = CGI::escape norm_url(url)
    page  = agent.get(INFO % [url, token])

    lines = page.content.split("\n").map do |line|
      line.split(",")
    end
    head  = lines.shift.map { |key| key.downcase.gsub(' ', '_').to_sym }

    $lines = lines.map do |line|
      Hash[head.zip(line)]
    end
  end

  def crawl_stats(url)
    url   = CGI::escape norm_url(url)
    types = %w(pages kilobytes milliseconds).map(&:to_sym)
    head  = %w(high avg low).map(&:to_sym)

    page  = agent.get(STATS % url)

    Hash[types.zip(page.search(".hostload-activity tr td").map do |node|
      node.text.gsub(/\D/, '').to_i
    end.each_slice(3).map do |slice|
      Hash[head.zip(slice)]
    end)]
  end

  def crawl_error_counts(url)
    url  = CGI::escape norm_url(url)
    page = agent.get(ERRORS % url)

    page.search(".categories a").inject({}) do |hash, n|
      key, value = n.text.split("\n")
      hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
      hash
    end
  end

  def remove_url(url, file)
    url  = CGI::escape norm_url(url)
    page = agent.get(REMOVAL % [url, url + file])
    page = agent.submit page.form
  end

  private
  def agent
    @agent ||= Mechanize.new
  end

  def norm_url(url)
    schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
    "#{schema || 'http://'}#{host}/"
  end
end