lib/media_wiki/gateway.rb in mediawiki-gateway-0.4.5 vs lib/media_wiki/gateway.rb in mediawiki-gateway-0.5.0
- old
+ new
@@ -4,46 +4,48 @@
require 'rexml/document'
require 'uri'
require 'active_support'
module MediaWiki
-
+
class Gateway
attr_reader :log
-
+
# Set up a MediaWiki::Gateway for a given MediaWiki installation
#
# [url] Path to API of target MediaWiki (eg. "http://en.wikipedia.org/w/api.php")
# [options] Hash of options
#
# Options:
# [:bot] When set to true, executes API queries with the bot parameter (see http://www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.
# [:ignorewarnings] Log API warnings and invalid page titles, instead throwing MediaWiki::APIError
# [:limit] Maximum number of results returned per search (see http://www.mediawiki.org/wiki/API:Query_-_Lists#Limits), defaults to the MediaWiki default of 500.
+ # [:logdevice] Log device to use. Defaults to STDERR
# [:loglevel] Log level to use, defaults to Logger::WARN. Set to Logger::DEBUG to dump every request and response to the log.
# [:maxlag] Maximum allowed server lag (see http://www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.
# [:retry_count] Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries).
# [:retry_delay] Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds.
def initialize(url, options={})
default_options = {
+ :bot => false,
:limit => 500,
+ :logdevice => STDERR,
:loglevel => Logger::WARN,
:maxlag => 5,
:retry_count => 3,
:retry_delay => 10,
- :bot => false
}
@options = default_options.merge(options)
@wiki_url = url
- @log = Logger.new(STDERR)
+ @log = Logger.new(@options[:logdevice])
@log.level = @options[:loglevel]
@headers = { "User-Agent" => "MediaWiki::Gateway/#{MediaWiki::VERSION}" }
@cookies = {}
end
-
+
attr_reader :base_url, :cookies
-
+
# Login to MediaWiki
#
# [username] Username
# [password] Password
# [domain] Domain for authentication plugin logins (eg. LDAP), optional -- defaults to 'local' if not given
@@ -53,11 +55,11 @@
form_data = {'action' => 'login', 'lgname' => username, 'lgpassword' => password, 'lgdomain' => domain}
make_api_request(form_data)
@password = password
@username = username
end
-
+
# Fetch MediaWiki page in MediaWiki format. Does not follow redirects.
#
# [page_title] Page title to fetch
#
# Returns content of page as string, nil if the page does not exist.
@@ -89,11 +91,11 @@
#
# Options:
# * [:linkbase] supply a String to prefix all internal (relative) links with. '/wiki/' is assumed to be the base of a relative link
# * [:noeditsections] strips all edit-links if set to +true+
# * [:noimages] strips all +img+ tags from the rendered text if set to +true+
- #
+ #
# Returns rendered page as string, or nil if the page does not exist
def render(page_title, options = {})
form_data = {'action' => 'parse', 'page' => page_title}
valid_options = %w(linkbase noeditsections noimages)
@@ -116,11 +118,11 @@
rendered = rendered.gsub(/<img.*\/>/, '')
end
end
rendered
end
-
+
# Create a new page, or overwrite an existing one
#
# [title] Page title to create or overwrite, string
# [content] Content for the page, string
# [options] Hash of additional options
@@ -218,15 +220,15 @@
# * [:watch] Add page and any redirect to watchlist
# * [:unwatch] Remove page and any redirect from watchlist
def move(from, to, options={})
valid_options = %w(movesubpages movetalk noredirect reason watch unwatch)
options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)}
-
+
form_data = options.merge({'action' => 'move', 'from' => from, 'to' => to, 'token' => get_token('move', from)})
make_api_request(form_data)
end
-
+
# Delete one page. (MediaWiki API does not support deleting multiple pages at a time.)
#
# [title] Title of page to delete
def delete(title)
form_data = {'action' => 'delete', 'title' => title, 'token' => get_token('delete', title)}
@@ -271,10 +273,32 @@
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
end while apfrom
titles
end
+ # Get a list of pages that are members of a category
+ #
+ # [category] Name of the category
+ # [options] Optional hash of additional options. See http://www.mediawiki.org/wiki/API:Categorymembers
+ #
+ # Returns array of page titles (empty if no matches)
+ def category_members(category, options = {})
+ titles = []
+ apfrom = nil
+ begin
+ form_data = options.merge(
+ {'action' => 'query',
+ 'list' => 'categorymembers',
+ 'apfrom' => apfrom,
+ 'cmtitle' => category,
+ 'cmlimit' => @options[:limit]})
+ res, apfrom = make_api_request(form_data, '//query-continue/categorymembers/@apfrom')
+ titles += REXML::XPath.match(res, "//cm").map { |x| x.attributes["title"] }
+ end while apfrom
+ titles
+ end
+
# Get a list of pages that link to a target page
#
# [title] Link target page
# [filter] "all" links (default), "redirects" only, or "nonredirects" (plain links only)
#
@@ -324,24 +348,24 @@
titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] }
end while offset
titles
end
- # Upload a file, or get the status of pending uploads. Several
+ # Upload a file, or get the status of pending uploads. Several
# methods are available:
#
# * Upload file contents directly.
- # * Have the MediaWiki server fetch a file from a URL, using the
+ # * Have the MediaWiki server fetch a file from a URL, using the
# "url" parameter
#
# Requires Mediawiki 1.16+
#
# Arguments:
# * [path] Path to file to upload. Set to nil if uploading from URL.
# * [options] Hash of additional options
- #
- # Note that queries using session keys must be done in the same login
+ #
+ # Note that queries using session keys must be done in the same login
# session as the query that originally returned the key (i.e. do not
# log out and then log back in).
#
# Options:
# * 'filename' - Target filename (defaults to local name if not given), options[:target] is alias for this.
@@ -353,11 +377,11 @@
#
# Deprecated but still supported options:
# * :description - Description of this file. Used as 'text'.
# * :target - Target filename, same as 'filename'.
# * :summary - Edit summary for history. Used as 'comment'. Also used as 'text' if neither it or :description is specified.
- #
+ #
# Examples:
# mw.upload('/path/to/local/file.jpg', 'filename' => "RemoteFile.jpg")
# mw.upload(nil, 'filename' => "RemoteFile2.jpg", 'url' => 'http://remote.com/server/file.jpg')
#
def upload(path, options={})
@@ -402,11 +426,11 @@
def redirect?(page_title)
form_data = {'action' => 'query', 'prop' => 'info', 'titles' => page_title}
page = make_api_request(form_data).first.elements["query/pages/page"]
!!(valid_page?(page) and page.attributes["redirect"])
end
-
+
# Requests image info from MediaWiki. Follows redirects.
#
# _file_name_or_page_id_ should be either:
# * a file name (String) you want info about without File: prefix.
# * or a Fixnum page id you of the file.
@@ -459,16 +483,16 @@
else
nil
end
end
- # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to
+ # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to
# #image_info however options['iiprop'] is forced to url. You can still
# set other options to control what file you want to download.
def download(file_name, options={})
options['iiprop'] = 'url'
-
+
attributes = image_info(file_name, options)
if attributes
RestClient.get attributes['url']
else
nil
@@ -477,11 +501,11 @@
# Imports a MediaWiki XML dump
#
# [xml] String or array of page names to fetch
#
- # Returns XML array <api><import><page/><page/>...
+ # Returns XML array <api><import><page/><page/>...
# <page revisions="1"> (or more) means successfully imported
# <page revisions="0"> means duplicate, not imported
def import(xmlfile)
form_data = { "action" => "import",
"xml" => File.new(xmlfile),
@@ -523,11 +547,11 @@
name = extension.attributes["name"] || ""
extensions[name] = extension.attributes["version"]
extensions
end
end
-
+
# Execute Semantic Mediawiki query
#
# [query] Semantic Mediawiki query
# [params] Array of additional parameters or options, eg. mainlabel=Foo or ?Place (optional)
#
@@ -536,11 +560,11 @@
params << "format=list"
form_data = { 'action' => 'parse', 'prop' => 'text', 'text' => "{{#ask:#{query}|#{params.join('|')}}}" }
xml, dummy = make_api_request(form_data)
return xml.elements["parse/text"].text
end
-
+
# Set groups for a user
#
# [user] Username of user to modify
# [groups_to_add] Groups to add user to, as an array or a string if a single group (optional)
# [groups_to_remove] Groups to remove user from, as an array or a string if a single group (optional)
@@ -597,14 +621,14 @@
raise APIError.new('invaliduser', "User '#{user}' was not found (get_userrights_token)")
else
raise Unauthorized.new "User '#{@username}' is not permitted to perform this operation: get_userrights_token"
end
end
-
+
token
end
-
+
def userrights(user, token, groups_to_add, groups_to_remove, reason)
# groups_to_add and groups_to_remove can be a string or an array. Turn them into MediaWiki's pipe-delimited list format.
if groups_to_add.is_a? Array
groups_to_add = groups_to_add.join('|')
end
@@ -618,11 +642,11 @@
'reason' => reason
}
res, dummy = make_api_request(form_data)
res
end
-
+
# Make generic request to API
#
# [form_data] hash or string of attributes to post
# [continue_xpath] XPath selector for query continue parameter
# [retry_count] Counter for retries
@@ -632,19 +656,18 @@
if form_data.kind_of? Hash
form_data['format'] = 'xml'
form_data['maxlag'] = @options[:maxlag]
form_data['bot']="1" if @options[:bot]
end
- log.debug("REQ: #{form_data.inspect}, #{@cookies.inspect}")
- RestClient.post(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block|
+ http_send(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block|
if response.code == 503 and retry_count < @options[:retry_count]
log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.")
sleep @options[:retry_delay]
make_api_request(form_data, continue_xpath, retry_count + 1)
end
# Check response for errors and return XML
- raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300
+ raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300
doc = get_response(response.dup)
if(form_data['action'] == 'login')
login_result = doc.elements["login"].attributes['result']
@cookies.merge!(response.cookies)
case login_result
@@ -655,11 +678,23 @@
end
continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath).value : nil
return [doc, continue]
end
end
-
+
+ # Execute the HTTP request using either GET or POST as appropriate
+ def http_send url, form_data, headers, &block
+ if form_data['action'] == 'query'
+ log.debug("GET: #{form_data.inspect}, #{@cookies.inspect}")
+ headers[:params] = form_data
+ RestClient.get url, headers, &block
+ else
+ log.debug("POST: #{form_data.inspect}, #{@cookies.inspect}")
+ RestClient.post url, form_data, headers, &block
+ end
+ end
+
# Get API XML response
# If there are errors or warnings, raise APIError
# Otherwise return XML root
def get_response(res)
begin
@@ -678,20 +713,20 @@
if doc.elements["warnings"]
warning("API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}")
end
doc
end
-
+
def valid_page?(page)
return false unless page
return false if page.attributes["missing"]
if page.attributes["invalid"]
warning("Invalid title '#{page.attributes["title"]}'")
else
true
end
end
-
+
def warning(msg)
if @options[:ignorewarnings]
log.warn(msg)
return false
else