lib/media_wiki/gateway.rb in mediawiki-gateway-0.4.5 vs lib/media_wiki/gateway.rb in mediawiki-gateway-0.5.0

- old
+ new

@@ -4,46 +4,48 @@ require 'rexml/document' require 'uri' require 'active_support' module MediaWiki - + class Gateway attr_reader :log - + # Set up a MediaWiki::Gateway for a given MediaWiki installation # # [url] Path to API of target MediaWiki (eg. "http://en.wikipedia.org/w/api.php") # [options] Hash of options # # Options: # [:bot] When set to true, executes API queries with the bot parameter (see http://www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false. # [:ignorewarnings] Log API warnings and invalid page titles, instead throwing MediaWiki::APIError # [:limit] Maximum number of results returned per search (see http://www.mediawiki.org/wiki/API:Query_-_Lists#Limits), defaults to the MediaWiki default of 500. + # [:logdevice] Log device to use. Defaults to STDERR # [:loglevel] Log level to use, defaults to Logger::WARN. Set to Logger::DEBUG to dump every request and response to the log. # [:maxlag] Maximum allowed server lag (see http://www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds. # [:retry_count] Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries). # [:retry_delay] Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds. def initialize(url, options={}) default_options = { + :bot => false, :limit => 500, + :logdevice => STDERR, :loglevel => Logger::WARN, :maxlag => 5, :retry_count => 3, :retry_delay => 10, - :bot => false } @options = default_options.merge(options) @wiki_url = url - @log = Logger.new(STDERR) + @log = Logger.new(@options[:logdevice]) @log.level = @options[:loglevel] @headers = { "User-Agent" => "MediaWiki::Gateway/#{MediaWiki::VERSION}" } @cookies = {} end - + attr_reader :base_url, :cookies - + # Login to MediaWiki # # [username] Username # [password] Password # [domain] Domain for authentication plugin logins (eg. LDAP), optional -- defaults to 'local' if not given @@ -53,11 +55,11 @@ form_data = {'action' => 'login', 'lgname' => username, 'lgpassword' => password, 'lgdomain' => domain} make_api_request(form_data) @password = password @username = username end - + # Fetch MediaWiki page in MediaWiki format. Does not follow redirects. # # [page_title] Page title to fetch # # Returns content of page as string, nil if the page does not exist. @@ -89,11 +91,11 @@ # # Options: # * [:linkbase] supply a String to prefix all internal (relative) links with. '/wiki/' is assumed to be the base of a relative link # * [:noeditsections] strips all edit-links if set to +true+ # * [:noimages] strips all +img+ tags from the rendered text if set to +true+ - # + # # Returns rendered page as string, or nil if the page does not exist def render(page_title, options = {}) form_data = {'action' => 'parse', 'page' => page_title} valid_options = %w(linkbase noeditsections noimages) @@ -116,11 +118,11 @@ rendered = rendered.gsub(/<img.*\/>/, '') end end rendered end - + # Create a new page, or overwrite an existing one # # [title] Page title to create or overwrite, string # [content] Content for the page, string # [options] Hash of additional options @@ -218,15 +220,15 @@ # * [:watch] Add page and any redirect to watchlist # * [:unwatch] Remove page and any redirect from watchlist def move(from, to, options={}) valid_options = %w(movesubpages movetalk noredirect reason watch unwatch) options.keys.each{|opt| raise ArgumentError.new("Unknown option '#{opt}'") unless valid_options.include?(opt.to_s)} - + form_data = options.merge({'action' => 'move', 'from' => from, 'to' => to, 'token' => get_token('move', from)}) make_api_request(form_data) end - + # Delete one page. (MediaWiki API does not support deleting multiple pages at a time.) # # [title] Title of page to delete def delete(title) form_data = {'action' => 'delete', 'title' => title, 'token' => get_token('delete', title)} @@ -271,10 +273,32 @@ titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] } end while apfrom titles end + # Get a list of pages that are members of a category + # + # [category] Name of the category + # [options] Optional hash of additional options. See http://www.mediawiki.org/wiki/API:Categorymembers + # + # Returns array of page titles (empty if no matches) + def category_members(category, options = {}) + titles = [] + apfrom = nil + begin + form_data = options.merge( + {'action' => 'query', + 'list' => 'categorymembers', + 'apfrom' => apfrom, + 'cmtitle' => category, + 'cmlimit' => @options[:limit]}) + res, apfrom = make_api_request(form_data, '//query-continue/categorymembers/@apfrom') + titles += REXML::XPath.match(res, "//cm").map { |x| x.attributes["title"] } + end while apfrom + titles + end + # Get a list of pages that link to a target page # # [title] Link target page # [filter] "all" links (default), "redirects" only, or "nonredirects" (plain links only) # @@ -324,24 +348,24 @@ titles += REXML::XPath.match(res, "//p").map { |x| x.attributes["title"] } end while offset titles end - # Upload a file, or get the status of pending uploads. Several + # Upload a file, or get the status of pending uploads. Several # methods are available: # # * Upload file contents directly. - # * Have the MediaWiki server fetch a file from a URL, using the + # * Have the MediaWiki server fetch a file from a URL, using the # "url" parameter # # Requires Mediawiki 1.16+ # # Arguments: # * [path] Path to file to upload. Set to nil if uploading from URL. # * [options] Hash of additional options - # - # Note that queries using session keys must be done in the same login + # + # Note that queries using session keys must be done in the same login # session as the query that originally returned the key (i.e. do not # log out and then log back in). # # Options: # * 'filename' - Target filename (defaults to local name if not given), options[:target] is alias for this. @@ -353,11 +377,11 @@ # # Deprecated but still supported options: # * :description - Description of this file. Used as 'text'. # * :target - Target filename, same as 'filename'. # * :summary - Edit summary for history. Used as 'comment'. Also used as 'text' if neither it or :description is specified. - # + # # Examples: # mw.upload('/path/to/local/file.jpg', 'filename' => "RemoteFile.jpg") # mw.upload(nil, 'filename' => "RemoteFile2.jpg", 'url' => 'http://remote.com/server/file.jpg') # def upload(path, options={}) @@ -402,11 +426,11 @@ def redirect?(page_title) form_data = {'action' => 'query', 'prop' => 'info', 'titles' => page_title} page = make_api_request(form_data).first.elements["query/pages/page"] !!(valid_page?(page) and page.attributes["redirect"]) end - + # Requests image info from MediaWiki. Follows redirects. # # _file_name_or_page_id_ should be either: # * a file name (String) you want info about without File: prefix. # * or a Fixnum page id you of the file. @@ -459,16 +483,16 @@ else nil end end - # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to + # Download _file_name_ (without "File:" or "Image:" prefix). Returns file contents. All options are passed to # #image_info however options['iiprop'] is forced to url. You can still # set other options to control what file you want to download. def download(file_name, options={}) options['iiprop'] = 'url' - + attributes = image_info(file_name, options) if attributes RestClient.get attributes['url'] else nil @@ -477,11 +501,11 @@ # Imports a MediaWiki XML dump # # [xml] String or array of page names to fetch # - # Returns XML array <api><import><page/><page/>... + # Returns XML array <api><import><page/><page/>... # <page revisions="1"> (or more) means successfully imported # <page revisions="0"> means duplicate, not imported def import(xmlfile) form_data = { "action" => "import", "xml" => File.new(xmlfile), @@ -523,11 +547,11 @@ name = extension.attributes["name"] || "" extensions[name] = extension.attributes["version"] extensions end end - + # Execute Semantic Mediawiki query # # [query] Semantic Mediawiki query # [params] Array of additional parameters or options, eg. mainlabel=Foo or ?Place (optional) # @@ -536,11 +560,11 @@ params << "format=list" form_data = { 'action' => 'parse', 'prop' => 'text', 'text' => "{{#ask:#{query}|#{params.join('|')}}}" } xml, dummy = make_api_request(form_data) return xml.elements["parse/text"].text end - + # Set groups for a user # # [user] Username of user to modify # [groups_to_add] Groups to add user to, as an array or a string if a single group (optional) # [groups_to_remove] Groups to remove user from, as an array or a string if a single group (optional) @@ -597,14 +621,14 @@ raise APIError.new('invaliduser', "User '#{user}' was not found (get_userrights_token)") else raise Unauthorized.new "User '#{@username}' is not permitted to perform this operation: get_userrights_token" end end - + token end - + def userrights(user, token, groups_to_add, groups_to_remove, reason) # groups_to_add and groups_to_remove can be a string or an array. Turn them into MediaWiki's pipe-delimited list format. if groups_to_add.is_a? Array groups_to_add = groups_to_add.join('|') end @@ -618,11 +642,11 @@ 'reason' => reason } res, dummy = make_api_request(form_data) res end - + # Make generic request to API # # [form_data] hash or string of attributes to post # [continue_xpath] XPath selector for query continue parameter # [retry_count] Counter for retries @@ -632,19 +656,18 @@ if form_data.kind_of? Hash form_data['format'] = 'xml' form_data['maxlag'] = @options[:maxlag] form_data['bot']="1" if @options[:bot] end - log.debug("REQ: #{form_data.inspect}, #{@cookies.inspect}") - RestClient.post(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block| + http_send(@wiki_url, form_data, @headers.merge({:cookies => @cookies})) do |response, &block| if response.code == 503 and retry_count < @options[:retry_count] log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.") sleep @options[:retry_delay] make_api_request(form_data, continue_xpath, retry_count + 1) end # Check response for errors and return XML - raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300 + raise MediaWiki::Exception.new "Bad response: #{response}" unless response.code >= 200 and response.code < 300 doc = get_response(response.dup) if(form_data['action'] == 'login') login_result = doc.elements["login"].attributes['result'] @cookies.merge!(response.cookies) case login_result @@ -655,11 +678,23 @@ end continue = (continue_xpath and doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath).value : nil return [doc, continue] end end - + + # Execute the HTTP request using either GET or POST as appropriate + def http_send url, form_data, headers, &block + if form_data['action'] == 'query' + log.debug("GET: #{form_data.inspect}, #{@cookies.inspect}") + headers[:params] = form_data + RestClient.get url, headers, &block + else + log.debug("POST: #{form_data.inspect}, #{@cookies.inspect}") + RestClient.post url, form_data, headers, &block + end + end + # Get API XML response # If there are errors or warnings, raise APIError # Otherwise return XML root def get_response(res) begin @@ -678,20 +713,20 @@ if doc.elements["warnings"] warning("API warning: #{doc.elements["warnings"].children.map {|e| e.text}.join(", ")}") end doc end - + def valid_page?(page) return false unless page return false if page.attributes["missing"] if page.attributes["invalid"] warning("Invalid title '#{page.attributes["title"]}'") else true end end - + def warning(msg) if @options[:ignorewarnings] log.warn(msg) return false else