webmaster_tools.rb in webmaster_tools-0.1.0.rc2

- old
+ new

@@ -11,38 +11,58 @@
 #  optional:
 #   :url -
 #   :security_token -
 class WebmasterTools
   LOGIN     = "https://accounts.google.com/ServiceLogin?service=sitemaps"
+  AUTH      = "https://accounts.google.com/ServiceLoginAuth"
   REMOVAL   = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
   INFO      = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
   DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
   ERRORS    = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
   STATS     = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
+  TOKEN     = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
+  GWT       = "https://www.google.com/webmasters/tools/gwt/"
+  GWT_PERM  = "E3DA43109D05B1A5067480CE25494CC2"
 
+  PAYLOAD   = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
+
   def initialize(username, password)
     login(username, password)
   end
 
   def login(username, password)
     page = agent.get(LOGIN)
     page = agent.submit(page.form.tap do |form|
       form.Email  = username
       form.Passwd = password
     end)
+    raise "Wrong username + password combination" if page.content.include?(AUTH)
   end
 
   def dashboard(url)
-    url   = norm_url(url)
+    url   = CGI::escape norm_url(url)
     page  = agent.get(DASHBOARD % url)
-    {
-      :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
-    }
+    page.search("#sitemap tbody .rightmost").map do |node|
+      { :indexed_web => node.text.gsub(/\D/, '').to_i }
+    end
   end
 
-  def crawl_info(url, token)
-    url   = norm_url(url)
+  def security_token(url)
+    # looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
+    dashboard(url) # to trigger referer
+    url  = norm_url(url)
+    page = agent.post(TOKEN, PAYLOAD % [GWT, url],  {
+      "X-GWT-Module-Base" => GWT,
+      "X-GWT-Permutation" => GWT_PERM,
+      "Content-Type" => "text/x-gwt-rpc; charset=utf-8",
+    })
+    page.content.scan(/security_token=([^"]+)/).flatten.first
+  end
+
+  def crawl_info(url)
+    token = security_token(url)
+    url   = CGI::escape norm_url(url)
     page  = agent.get(INFO % [url, token])
 
     lines = page.content.split("\n").map do |line|
       line.split(",")
     end
@@ -52,11 +72,11 @@
       Hash[head.zip(line)]
     end
   end
 
   def crawl_stats(url)
-    url   = norm_url(url)
+    url   = CGI::escape norm_url(url)
     types = %w(pages kilobytes milliseconds).map(&:to_sym)
     head  = %w(high avg low).map(&:to_sym)
 
     page  = agent.get(STATS % url)
 
@@ -66,22 +86,22 @@
       Hash[head.zip(slice)]
     end)]
   end
 
   def crawl_error_counts(url)
-    url  = norm_url(url)
+    url  = CGI::escape norm_url(url)
     page = agent.get(ERRORS % url)
 
     page.search(".categories a").inject({}) do |hash, n|
       key, value = n.text.split("\n")
       hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
       hash
     end
   end
 
   def remove_url(url, file)
-    url  = norm_url(url)
+    url  = CGI::escape norm_url(url)
     page = agent.get(REMOVAL % [url, url + file])
     page = agent.submit page.form
   end
 
   private
@@ -89,8 +109,8 @@
     @agent ||= Mechanize.new
   end
 
   def norm_url(url)
     schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
-    CGI::escape "#{schema || 'http://'}#{host}/"
+    "#{schema || 'http://'}#{host}/"
   end
 end