lib/webmaster_tools.rb in webmaster_tools-0.1.0.rc1 vs lib/webmaster_tools.rb in webmaster_tools-0.1.0.rc2
- old
+ new
@@ -11,38 +11,58 @@
# optional:
# :url -
# :security_token -
class WebmasterTools
LOGIN = "https://accounts.google.com/ServiceLogin?service=sitemaps"
+ AUTH = "https://accounts.google.com/ServiceLoginAuth"
REMOVAL = "https://www.google.com/webmasters/tools/removals-request?hl=en&siteUrl=%s&urlt=%s"
INFO = "https://www.google.com/webmasters/tools/sitemaps-dl?hl=en&siteUrl=%s&security_token=%s"
DASHBOARD = "https://www.google.com/webmasters/tools/dashboard?hl=en&siteUrl=%s"
ERRORS = "https://www.google.com/webmasters/tools/crawl-errors?hl=en&siteUrl=%s"
STATS = "https://www.google.com/webmasters/tools/crawl-stats?hl=en&siteUrl=%s"
+ TOKEN = "https://www.google.com/webmasters/tools/gwt/SITEMAPS_READ"
+ GWT = "https://www.google.com/webmasters/tools/gwt/"
+ GWT_PERM = "E3DA43109D05B1A5067480CE25494CC2"
+ PAYLOAD = "7|0|11|%s|3EA173CEE6992CFDEAB5C18469B06594|com.google.crawl.wmconsole.fe.feature.gwt.sitemaps.shared.SitemapsService|getDataForMainPage|com.google.crawl.wmconsole.fe.feature.gwt.common.shared.FeatureContext/2156265033|Z|/webmasters/tools|com.google.crawl.wmconsole.fe.feature.gwt.config.FeatureKey/497977451|en|%s|com.google.crawl.wmconsole.fe.base.PermissionLevel/2330262508|1|2|3|4|3|5|6|6|5|7|8|5|9|10|11|5|1|0|"
+
def initialize(username, password)
login(username, password)
end
def login(username, password)
page = agent.get(LOGIN)
page = agent.submit(page.form.tap do |form|
form.Email = username
form.Passwd = password
end)
+ raise "Wrong username + password combination" if page.content.include?(AUTH)
end
def dashboard(url)
- url = norm_url(url)
+ url = CGI::escape norm_url(url)
page = agent.get(DASHBOARD % url)
- {
- :indexed => page.search("#sitemap tbody .rightmost").text.gsub(/\D/, '').to_i
- }
+ page.search("#sitemap tbody .rightmost").map do |node|
+ { :indexed_web => node.text.gsub(/\D/, '').to_i }
+ end
end
- def crawl_info(url, token)
- url = norm_url(url)
+ def security_token(url)
+ # looks like `crawl_error_counts(url)` contains the security_token as well (if data available)...
+ dashboard(url) # to trigger referer
+ url = norm_url(url)
+ page = agent.post(TOKEN, PAYLOAD % [GWT, url], {
+ "X-GWT-Module-Base" => GWT,
+ "X-GWT-Permutation" => GWT_PERM,
+ "Content-Type" => "text/x-gwt-rpc; charset=utf-8",
+ })
+ page.content.scan(/security_token=([^"]+)/).flatten.first
+ end
+
+ def crawl_info(url)
+ token = security_token(url)
+ url = CGI::escape norm_url(url)
page = agent.get(INFO % [url, token])
lines = page.content.split("\n").map do |line|
line.split(",")
end
@@ -52,11 +72,11 @@
Hash[head.zip(line)]
end
end
def crawl_stats(url)
- url = norm_url(url)
+ url = CGI::escape norm_url(url)
types = %w(pages kilobytes milliseconds).map(&:to_sym)
head = %w(high avg low).map(&:to_sym)
page = agent.get(STATS % url)
@@ -66,22 +86,22 @@
Hash[head.zip(slice)]
end)]
end
def crawl_error_counts(url)
- url = norm_url(url)
+ url = CGI::escape norm_url(url)
page = agent.get(ERRORS % url)
page.search(".categories a").inject({}) do |hash, n|
key, value = n.text.split("\n")
hash[key.downcase.gsub(' ', '_').to_sym] = value.gsub(/\D/, '').to_i
hash
end
end
def remove_url(url, file)
- url = norm_url(url)
+ url = CGI::escape norm_url(url)
page = agent.get(REMOVAL % [url, url + file])
page = agent.submit page.form
end
private
@@ -89,8 +109,8 @@
@agent ||= Mechanize.new
end
def norm_url(url)
schema, host = url.scan(/^(https?:\/\/)?(.+?)\/?$/).flatten
- CGI::escape "#{schema || 'http://'}#{host}/"
+ "#{schema || 'http://'}#{host}/"
end
end