lib/rets4r/client.rb in rets4r-0.8.5 vs lib/rets4r/client.rb in rets4r-1.1.18

- old
+ new

@@ -5,663 +5,483 @@ # This program is copyrighted free software by Scott Patterson. You can # redistribute it and/or modify it under the same terms of Ruby's license; # either the dual license version in 2003 (see the file RUBYS), or any later # version. # -# TODO -# Case-insensitive header +# TODO: 1.0 Support (Adding this support should be fairly easy) +# TODO: 2.0 Support (Adding this support will be very difficult since it is a completely different methodology) +# TODO: Case-insensitive header require 'digest/md5' require 'net/http' require 'uri' require 'cgi' -require 'rets4r/auth' -require 'rets4r/client/dataobject' -require 'thread' +require 'auth' +require 'client/dataobject' +require 'client/parsers/response_parser' +require 'client/parsers/compact' +require 'rets4r/client/links' +require 'rets4r/client/requester' +require 'rets4r/client/exceptions' require 'logger' +require 'webrick/httputils' module RETS4R - class Client - OUTPUT_RAW = 0 # Nothing done. Simply returns the XML. - OUTPUT_DOM = 1 # Returns a DOM object (REXML) **** NO LONGER SUPPORTED! **** - OUTPUT_RUBY = 2 # Returns a RETS::Data object - - METHOD_GET = 'GET' - METHOD_POST = 'POST' - METHOD_HEAD = 'HEAD' - - DEFAULT_OUTPUT = OUTPUT_RUBY - DEFAULT_METHOD = METHOD_POST - DEFAULT_RETRY = 2 - DEFAULT_USER_AGENT = 'RETS4R/0.8.5' - DEFAULT_RETS_VERSION = '1.7' - SUPPORTED_RETS_VERSIONS = ['1.5', '1.7'] - CAPABILITY_LIST = ['Action', 'ChangePassword', 'GetObject', 'Login', 'LoginComplete', 'Logout', 'Search', 'GetMetadata', 'Update'] - SUPPORTED_PARSERS = [] # This will be populated by parsers as they load - - # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications. - # Provided for convenience and are used by the HTTPError class to provide more useful - # messages. - RETS_HTTP_MESSAGES = { - '200' => 'Operation successful.', - '400' => 'The request could not be understood by the server due to malformed syntax.', - '401' => 'Either the header did not contain an acceptable Authorization or the username/password was invalid. The server response MUST include a WWW-Authenticate header field.', - '402' => 'The requested transaction requires a payment which could not be authorized.', - '403' => 'The server understood the request, but is refusing to fulfill it.', - '404' => 'The server has not found anything matching the Request-URI.', - '405' => 'The method specified in the Request-Line is not allowed for the resource identified by the Request-URI.', - '406' => 'The resource identified by the request is only capable of generating response entities which have content characteristics not acceptable according to the accept headers sent in the request.', - '408' => 'The client did not produce a request within the time that the server was prepared to wait.', - '411' => 'The server refuses to accept the request without a defined Content-Length.', - '412' => 'Transaction not permitted at this point in the session.', - '413' => 'The server is refusing to process a request because the request entity is larger than the server is willing or able to process.', - '414' => 'The server is refusing to service the request because the Request-URI is longer than the server is willing to interpret. This error usually only occurs for a GET method.', - '500' => 'The server encountered an unexpected condition which prevented it from fulfilling the request.', - '501' => 'The server does not support the functionality required to fulfill the request.', - '503' => 'The server is currently unable to handle the request due to a temporary overloading or maintenance of the server.', - '505' => 'The server does not support, or refuses to support, the HTTP protocol version that was used in the request message.', - } - - attr_accessor :mimemap, :logger - - # We load our parsers here so that they can modify the client class appropriately. Because - # the default parser will be the first parser to list itself in the DEFAULT_PARSER array, - # we need to require them in the order of preference. Hence, XMLParser is loaded first because - # it is preferred to REXML since it is much faster. - require 'rets4r/client/parser/xmlparser' - require 'rets4r/client/parser/rexml' - - # Set it as the first - DEFAULT_PARSER = SUPPORTED_PARSERS[0] - - # Constructor - # - # Requires the URL to the RETS server and takes an optional output format. The output format - # determines the type of data returned by the various RETS transaction methods. - def initialize(url, output = DEFAULT_OUTPUT) - raise Unsupported.new('DOM output is no longer supported.') if output == OUTPUT_DOM - - @urls = { 'Login' => URI.parse(url) } - @nc = 0 - @headers = { - 'User-Agent' => DEFAULT_USER_AGENT, - 'Accept' => '*/*', - 'RETS-Version' => "RETS/#{DEFAULT_RETS_VERSION}", - 'RETS-Session-ID' => '0' - } - @request_method = DEFAULT_METHOD - @parser_class = DEFAULT_PARSER - @semaphore = Mutex.new - @output = output - - self.mimemap = { - 'image/jpeg' => 'jpg', - 'image/gif' => 'gif' - } - - if block_given? - yield self - end - end - - # Assigns a block that will be called just before the request is sent. - # This block must accept three parameters: - # * self - # * Net::HTTP instance - # * Hash of headers - # - # The block's return value will be ignored. If you want to prevent the request - # to go through, raise an exception. - # - # == Example - # - # client = RETS4R::Client.new(...) - # # Make a new pre_request_block that calculates the RETS-UA-Authorization header. - # client.set_pre_request_block do |rets, http, headers| - # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":")) - # if headers.has_key?("Cookie") then - # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i} - # cookie = cookie ? cookie.split("=").last : "" - # else - # cookie = "" - # end - # - # parts = [a1, "", cookie, headers["RETS-Version"]] - # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":")) - # end - def set_pre_request_block(&block) - @pre_request_block = block - end - - # We only allow external read access to URLs because they are internally set based on the - # results of various queries. - def urls - @urls - end - - # Parses the provided XML returns it in the specified output format. - # Requires an XML string and takes an optional output format to override the instance output - # format variable. We current create a new parser each time, which seems a bit wasteful, but - # it allows for the parser to be changed in the middle of a session as well as XML::Parser - # requiring a new instance for each execution...that could be encapsulated within its parser - # class,though, so we should benchmark and see if it will make a big difference with the - # REXML parse, which I doubt. - def parse(xml, output = false) - if xml == '' - trans = Transaction.new() - trans.reply_code = -1 - trans.reply_text = 'No transaction body was returned!' - end - - if output == OUTPUT_RAW || @output == OUTPUT_RAW - xml - else - begin - parser = @parser_class.new - parser.logger = logger - parser.output = output ? output : @output + class Client + COMPACT_FORMAT = 'COMPACT' - parser.parse(xml) - rescue - raise ParserException.new($!) - end - end - end - - # Setup Methods (accessors and mutators) - def set_output(output = DEFAULT_OUTPUT) - @output = output - end - - def get_output - @output - end - - def set_parser_class(klass, force = false) - if force || SUPPORTED_PARSERS.include?(klass) - @parser_class = klass - else - message = "The parser class '#{klass}' is not supported!" - debug(message) - - raise Unsupported.new(message) - end - end - - def get_parser_class - @parser_class - end - - def set_header(name, value) - if value.nil? then - @headers.delete(name) - else - @headers[name] = value - end - - debug("Set header '#{name}' to '#{value}'") - end - - def get_header(name) - @headers[name] - end - - def set_user_agent(name) - set_header('User-Agent', name) - end - - def get_user_agent - get_header('User-Agent') - end - - def set_rets_version(version) - if (SUPPORTED_RETS_VERSIONS.include? version) - set_header('RETS-Version', "RETS/#{version}") - else - raise Unsupported.new("The client does not support RETS version '#{version}'.") - end - end - - def get_rets_version - (get_header('RETS-Version') || "").gsub("RETS/", "") - end - - def set_request_method(method) - @request_method = method - end - - def get_request_method - @request_method - end - - # Provide more Ruby-like attribute accessors instead of get/set methods - alias_method :user_agent=, :set_user_agent - alias_method :user_agent, :get_user_agent - alias_method :request_method=, :set_request_method - alias_method :request_method, :get_request_method - alias_method :rets_version=, :set_rets_version - alias_method :rets_version, :get_rets_version - alias_method :parser_class=, :set_parser_class - alias_method :parser_class, :get_parser_class - alias_method :output=, :set_output - alias_method :output, :get_output - - #### RETS Transaction Methods #### - # - # Most of these transaction methods mirror the RETS specification methods, so if you are - # unsure what they mean, you should check the RETS specification. The latest version can be - # found at http://www.rets.org - - # Attempts to log into the server using the provided username and password. - # - # If called with a block, the results of the login action are yielded, - # and logout is called when the block returns. In that case, #login - # returns the block's value. If called without a block, returns the - # result. - # - # As specified in the RETS specification, the Action URL is called and - # the results made available in the #secondary_results accessor of the - # results object. - def login(username, password) #:yields: login_results - @username = username - @password = password - - # We are required to set the Accept header to this by the RETS 1.5 specification. - set_header('Accept', '*/*') - - response = request(@urls['Login']) - - # Parse response to get other URLS - results = self.parse(response.body, OUTPUT_RUBY) - - if (results.success?) - CAPABILITY_LIST.each do |capability| - next unless results.response[capability] - base = @urls['Login'].clone - base.path = results.response[capability] - - @urls[capability] = base - end - - debug("Capability URL List: #{@urls.inspect}") - else - raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})") - end - - if @output != OUTPUT_RUBY - results = self.parse(response.body) - end - - # Perform the mandatory get request on the action URL. - results.secondary_response = perform_action_url - - # We only yield - if block_given? - begin - yield results - ensure - self.logout - end - else - results - end - end - - # Logs out of the RETS server. - def logout() - # If no logout URL is provided, then we assume that logout is not necessary (not to - # mention impossible without a URL). We don't throw an exception, though, but we might - # want to if this becomes an issue in the future. - - request(@urls['Logout']) if @urls['Logout'] - end - - # Requests Metadata from the server. An optional type and id can be specified to request - # subsets of the Metadata. Please see the RETS specification for more details on this. - # The format variable tells the server which format to return the Metadata in. Unless you - # need the raw metadata in a specified format, you really shouldn't specify the format. - # - # If called with a block, yields the results and returns the value of the block, or - # returns the metadata directly. - def get_metadata(type = 'METADATA-SYSTEM', id = '*', format = 'COMPACT') - header = { - 'Accept' => 'text/xml,text/plain;q=0.5' - } - - data = { - 'Type' => type, - 'ID' => id, - 'Format' => format - } - - response = request(@urls['GetMetadata'], data, header) - - result = self.parse(response.body) - - if block_given? - yield result - else - result - end - end - - # Performs a GetObject transaction on the server. For details on the arguments, please see - # the RETS specification on GetObject requests. - # - # This method either returns an Array of DataObject instances, or yields each DataObject - # as it is created. If a block is given, the number of objects yielded is returned. - def get_object(resource, type, id, location = 1) #:yields: data_object - header = { - 'Accept' => mimemap.keys.join(',') - } - - data = { - 'Resource' => resource, - 'Type' => type, - 'ID' => id, - 'Location' => location.to_s - } - - response = request(@urls['GetObject'], data, header) - results = block_given? ? 0 : [] - - if response['content-type'].include?('multipart/parallel') - content_type = process_content_type(response['content-type']) + METHOD_GET = 'GET' + METHOD_POST = 'POST' + METHOD_HEAD = 'HEAD' - parts = response.body.split("\r\n--#{content_type['boundary']}") - parts.shift # Get rid of the initial boundary - - parts.each do |part| - (raw_header, raw_data) = part.split("\r\n\r\n") - - next unless raw_data - - data_header = process_header(raw_header) - data_object = DataObject.new(data_header, raw_data) - - if block_given? - yield data_object - results += 1 - else - results << data_object - end - end - else - info = { - 'content-type' => response['content-type'], # Compatibility shim. Deprecated. - 'Content-Type' => response['content-type'], - 'Object-ID' => response['Object-ID'], - 'Content-ID' => response['Content-ID'] - } - - if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then - data_object = DataObject.new(info, response.body) - if block_given? - yield data_object - results += 1 - else - results << data_object - end - end - end - - results - end - - # Peforms a RETS search transaction. Again, please see the RETS specification for details - # on what these parameters mean. The options parameter takes a hash of options that will - # added to the search statement. - def search(search_type, klass, query, options = false) - header = {} - - # Required Data - data = { - 'SearchType' => search_type, - 'Class' => klass, - 'Query' => query, - 'QueryType' => 'DMQL2', - 'Format' => 'COMPACT', - 'Count' => '0' - } - - # Options - #-- - # We might want to switch this to merge!, but I've kept it like this for now because it - # explicitly casts each value as a string prior to performing the search, so we find out now - # if can't force a value into the string context. I suppose it doesn't really matter when - # that happens, though... - #++ - options.each { |k,v| data[k] = v.to_s } if options - - response = request(@urls['Search'], data, header) - - results = self.parse(response.body) - - if block_given? - yield results - else - return results - end - end - - private - - def process_content_type(text) - content = {} - - field_start = text.index(';') + DEFAULT_METHOD = METHOD_GET + DEFAULT_RETRY = 2 + SUPPORTED_RETS_VERSIONS = ['1.5', '1.7'] + CAPABILITY_LIST = [ + 'Action', + 'ChangePassword', + 'GetObject', + 'Login', + 'LoginComplete', + 'Logout', + 'Search', + 'GetMetadata', + 'Update' + ] - content['content-type'] = text[0 ... field_start].strip - fields = text[field_start..-1] - - parts = text.split(';') - - parts.each do |part| - (name, value) = part.split('=') - - content[name.strip] = value ? value.strip : value - end - - content - end - - # Processes the HTTP header - #-- - # Could we switch over to using CGI for this? - #++ - def process_header(raw) - header = {} - - raw.each do |line| - (name, value) = line.split(':') - - header[name.strip] = value.strip if name && value - end - - header - end - - # Given a hash, it returns a URL encoded query string. - def create_query_string(hash) - parts = hash.map {|key,value| "#{CGI.escape(key)}=#{CGI.escape(value)}"} - return parts.join('&') - end - - # This is the primary transaction method, which the other public methods make use of. - # Given a url for the transaction (endpoint) it makes a request to the RETS server. - # - #-- - # This needs to be better documented, but for now please see the public transaction methods - # for how to make use of this method. - #++ - def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY) - headers, response = nil - begin - @semaphore.lock - - http = Net::HTTP.new(url.host, url.port) - - if logger && logger.debug? - http.set_debug_output HTTPDebugLogger.new(logger) - end - - http.start do |http| - begin - uri = url.path - - if ! data.empty? && method == METHOD_GET - uri += "?#{create_query_string(data)}" - end + # These are the response messages as defined in the RETS 1.5e2 and 1.7d6 specifications. + # Provided for convenience and are used by the HTTPError class to provide more useful + # messages. + RETS_HTTP_MESSAGES = { + '200' => 'Operation successful.', + '400' => 'The request could not be understood by the server due to malformed syntax.', + '401' => 'Either the header did not contain an acceptable Authorization or the ' + + 'username/password was invalid. The server response MUST include a ' + + 'WWW-Authenticate header field.', + '402' => 'The requested transaction requires a payment which could not be authorized.', + '403' => 'The server understood the request, but is refusing to fulfill it.', + '404' => 'The server has not found anything matching the Request-URI.', + '405' => 'The method specified in the Request-Line is not allowed for the resource ' + + 'identified by the Request-URI.', + '406' => 'The resource identified by the request is only capable of generating response ' + + 'entities which have content characteristics not acceptable according to the accept ' + + 'headers sent in the request.', + '408' => 'The client did not produce a request within the time that the server was prepared to wait.', + '411' => 'The server refuses to accept the request without a defined Content-Length.', + '412' => 'Transaction not permitted at this point in the session.', + '413' => 'The server is refusing to process a request because the request entity is larger than ' + + 'the server is willing or able to process.', + '414' => 'The server is refusing to service the request because the Request-URI is longer than ' + + 'the server is willing to interpret. This error usually only occurs for a GET method.', + '500' => 'The server encountered an unexpected condition which prevented it from fulfilling ' + + 'the request.', + '501' => 'The server does not support the functionality required to fulfill the request.', + '503' => 'The server is currently unable to handle the request due to a temporary overloading ' + + 'or maintenance of the server.', + '505' => 'The server does not support, or refuses to support, the HTTP protocol version that ' + + 'was used in the request message.', + } - headers = @headers - headers.merge(header) unless header.empty? - - @pre_request_block.call(self, http, headers) if @pre_request_block - - debug("Request headers: #{headers.inspect}") - - @semaphore.unlock - - post_data = data.map {|k,v| "#{CGI.escape(k.to_s)}=#{CGI.escape(v.to_s)}" }.join('&') if method == METHOD_POST - response = method == METHOD_POST ? http.post(uri, post_data, headers) : - http.get(uri, headers) - - debug("Response headers: #{response.to_hash.inspect}") + attr_accessor :mimemap + attr_reader :format, :urls - @semaphore.lock - - if response.code == '401' - # Authentication is required - raise AuthRequired - elsif response.code.to_i >= 300 - # We have a non-successful response that we cannot handle - @semaphore.unlock if @semaphore.locked? - raise HTTPError.new(response) - else - cookies = [] - if set_cookies = response.get_fields('set-cookie') then - set_cookies.each do |cookie| - cookies << cookie.split(";").first - end - end - set_header('Cookie', cookies.join("; ")) unless cookies.empty? - set_header('RETS-Session-ID', response['RETS-Session-ID']) if response['RETS-Session-ID'] - end - rescue AuthRequired - @nc += 1 + # Constructor + # + # Requires the URL to the RETS server and takes an optional output format. The output format + # determines the type of data returned by the various RETS transaction methods. + def initialize(url, format = COMPACT_FORMAT) + @request_struct = RETS4R::Client::Requester.new + @format = format + @urls = RETS4R::Client::Links.from_login_url(url) - if retry_auth > 0 - retry_auth -= 1 - set_header('Authorization', Auth.authenticate(response, @username, @password, url.path, method, @headers['RETS-Request-ID'], get_user_agent, @nc)) - retry - else - @semaphore.unlock if @semaphore.locked? - raise LoginError.new(response.message) - end - end - - debug(response.body) - end - - @semaphore.unlock if @semaphore.locked? - - return response - - #rescue - #data = {"request" => headers, "body" => response.body} - #data["response"] = response.respond_to?(:headers) ? response.headers : response - #data = data.respond_to?(:to_yaml) ? data.to_yaml : data.inspect - #raise RETSException, "#{$!.message}\nRequest/Response Details:\n#{data}" - end - end - - # If an action URL is present in the URL capability list, it calls that action URL and returns the - # raw result. Throws a generic RETSException if it is unable to follow the URL. - def perform_action_url - begin - if @urls.has_key?('Action') - return request(@urls['Action'], {}, {}, METHOD_GET) - end - rescue - raise RETSException.new("Unable to follow action URL: '#{$!}'.") - end - end - - # Shorthand for sending debug messages to the logger if a logger is provided - def debug(message) - logger.debug(message) if logger - end - - # Provides a proxy class to allow for net/http to log its debug to the logger. - class HTTPDebugLogger - def initialize(logger) - @logger = logger - end - - def <<(data) - @logger.debug(data) - end - end - - #### Exceptions #### - - # This exception should be thrown when a generic client error is encountered. - class ClientException < Exception - end - - # This exception should be thrown when there is an error with the parser, which is - # considered a subcomponent of the RETS client. It also includes the XML data that - # that was being processed at the time of the exception. - class ParserException < ClientException - attr_accessor :file - end - - # The client does not currently support a specified action. - class Unsupported < ClientException - end - - # The HTTP response returned by the server indicates that there was an error processing - # the request and the client cannot continue on its own without intervention. - class HTTPError < ClientException - attr_accessor :http_response - - # Takes a HTTPResponse object - def initialize(http_response) - self.http_response = http_response - end - - # Shorthand for calling HTTPResponse#code - def code - http_response.code - end - - # Shorthand for calling HTTPResponse#message - def message - http_response.message - end - - # Returns the RETS specification message for the HTTP response code - def rets_message - Client::RETS_HTTP_MESSAGES[code] - end - - def to_s - "#{code} #{message}: #{rets_message}" - end - end - - # A general RETS level exception was encountered. This would include HTTP and RETS - # specification level errors as well as informative mishaps such as authentication being - # required for access. - class RETSException < Exception - end - - # There was a problem with logging into the RETS server. - class LoginError < RETSException - end - - # For internal client use only, it is thrown when the a RETS request is made but a password - # is prompted for. - class AuthRequired < RETSException - end - end -end + @request_method = DEFAULT_METHOD + + @response_parser = RETS4R::Client::ResponseParser.new + + self.mimemap = { + 'image/jpeg' => 'jpg', + 'image/gif' => 'gif' + } + + if block_given? + yield self + end + end + + # Assigns a block that will be called just before the request is sent. + # This block must accept three parameters: + # * self + # * Net::HTTP instance + # * Hash of headers + # + # The block's return value will be ignored. If you want to prevent the request + # to go through, raise an exception. + # + # == Example + # + # client = RETS4R::Client.new(...) + # # Make a new pre_request_block that calculates the RETS-UA-Authorization header. + # client.set_pre_request_block do |rets, http, headers| + # a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":")) + # if headers.has_key?("Cookie") then + # cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i} + # cookie = cookie ? cookie.split("=").last : "" + # else + # cookie = "" + # end + # + # parts = [a1, "", cookie, headers["RETS-Version"]] + # headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":")) + # end + def set_pre_request_block(&block) + @request_struct.pre_request_block = block + end + + # So very much delegated to the request struct + def set_header(name, value) + @request_struct.set_header(name, value) + end + + def get_header(name) + @request_struct.headers[name] + end + + def user_agent=(name) + @request_struct.set_header('User-Agent', name) + end + + def user_agent + @request_struct.user_agent + end + + def rets_version=(version) + @request_struct.rets_version = version + end + + def rets_version + @request_struct.rets_version + end + + def request_method=(method) + @request_method = method + @request_struct.method = method + end + + def request_method + @request_method + end + + def logger=(logger) + @logger = logger + @request_struct.logger = logger + end + + def logger + @logger + end + + #### RETS Transaction Methods #### + # + # Most of these transaction methods mirror the RETS specification methods, so if you are + # unsure what they mean, you should check the RETS specification. The latest version can be + # found at http://www.rets.org + + # Attempts to log into the server using the provided username and password. + # + # If called with a block, the results of the login action are yielded, + # and logout is called when the block returns. In that case, #login + # returns the block's value. If called without a block, returns the + # result. + # + # As specified in the RETS specification, the Action URL is called and + # the results made available in the #secondary_results accessor of the + # results object. + def login(username, password) #:yields: login_results + @request_struct.username = username + @request_struct.password = password + + # We are required to set the Accept header to this by the RETS 1.5 specification. + set_header('Accept', '*/*') + + response = request(@urls.login) + + # Parse response to get other URLS + results = @response_parser.parse_key_value(response.body) + + if (results.success?) + CAPABILITY_LIST.each do |capability| + next unless results.response[capability] + + uri = URI.parse(results.response[capability]) + + if uri.absolute? + @urls[capability] = uri + else + base = @urls.login.clone + base.path = results.response[capability] + @urls[capability] = base + end + end + + logger.debug("Capability URL List: #{@urls.inspect}") if logger + else + raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})") + end + + # Perform the mandatory get request on the action URL. + results.secondary_response = perform_action_url + + # We only yield + if block_given? + begin + yield results + ensure + self.logout + end + else + results + end + end + + # Logs out of the RETS server. + def logout() + # If no logout URL is provided, then we assume that logout is not necessary (not to + # mention impossible without a URL). We don't throw an exception, though, but we might + # want to if this becomes an issue in the future. + + request(@urls.logout) if @urls.logout + end + + # Requests Metadata from the server. An optional type and id can be specified to request + # subsets of the Metadata. Please see the RETS specification for more details on this. + # The format variable tells the server which format to return the Metadata in. Unless you + # need the raw metadata in a specified format, you really shouldn't specify the format. + # + # If called with a block, yields the results and returns the value of the block, or + # returns the metadata directly. + def get_metadata(type = 'METADATA-SYSTEM', id = '*') + xml = download_metadata(type, id) + + result = @response_parser.parse_metadata(xml, @format) + + if block_given? + yield result + else + result + end + end + + def download_metadata(type, id) + header = { + 'Accept' => 'text/xml,text/plain;q=0.5' + } + + data = { + 'Type' => type, + 'ID' => id, + 'Format' => @format + } + + request(@urls.metadata, data, header).body + end + + # Performs a GetObject transaction on the server. For details on the arguments, please see + # the RETS specification on GetObject requests. + # + # This method either returns an Array of DataObject instances, or yields each DataObject + # as it is created. If a block is given, the number of objects yielded is returned. + # + # TODO: how much of this could we move over to WEBrick::HTTPRequest#parse? + def get_object(resource, type, id, location = false) #:yields: data_object + header = { + 'Accept' => mimemap.keys.join(',') + } + + data = { + 'Resource' => resource, + 'Type' => type, + 'ID' => id, + 'Location' => location ? '1' : '0' + } + + response = request(@urls.objects, data, header) + results = block_given? ? 0 : [] + + if response['content-type'] && response['content-type'].include?('text/xml') + # This probably means that there was an error. + # Response parser will likely raise an exception. + rr = @response_parser.parse_object_response(response.body) + return rr + elsif response['content-type'] && response['content-type'].include?('multipart/parallel') + content_type = process_content_type(response['content-type']) + +# TODO: log this +# puts "SPLIT ON #{content_type['boundary']}" + boundary = content_type['boundary'] + if boundary =~ /\s*'([^']*)\s*/ + boundary = $1 + end + parts = response.body.split("\r\n--#{boundary}") + + parts.shift # Get rid of the initial boundary + +# TODO: log this +# puts "GOT PARTS #{parts.length}" + + parts.each do |part| + (raw_header, raw_data) = part.split("\r\n\r\n") + +# TODO: log this +# puts raw_data.nil? + next unless raw_data + + data_header = process_header(raw_header) + data_object = DataObject.new(data_header, raw_data) + + if block_given? + yield data_object + results += 1 + else + results << data_object + end + end + else + info = { + 'content-type' => response['content-type'], # Compatibility shim. Deprecated. + 'Content-Type' => response['content-type'], + 'Object-ID' => response['Object-ID'], + 'Content-ID' => response['Content-ID'] + } + + if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then + data_object = DataObject.new(info, response.body) + if block_given? + yield data_object + results += 1 + else + results << data_object + end + end + end + + results + end + + # Peforms a RETS search transaction. Again, please see the RETS specification for details + # on what these parameters mean. The options parameter takes a hash of options that will + # added to the search statement. + def search(search_type, klass, query, options = false) + header = {} + + # Required Data + data = { + 'SearchType' => search_type, + 'Class' => klass, + 'Query' => query, + 'QueryType' => 'DMQL2', + 'Format' => format, + 'Count' => '0' + } + + # Options + #-- + # We might want to switch this to merge!, but I've kept it like this for now because it + # explicitly casts each value as a string prior to performing the search, so we find out now + # if can't force a value into the string context. I suppose it doesn't really matter when + # that happens, though... + #++ + options.each { |k,v| data[k] = v.to_s } if options + + response = request(@urls.search, data, header) + + # TODO: make parser configurable + results = RETS4R::Client::CompactNokogiriParser.new(response.body) + + if block_given? + results.each {|result| yield result} + else + return results.to_a + end + end + + def count(search_type, klass, query) + header = {} + data = { + 'SearchType' => search_type, + 'Class' => klass, + 'Query' => query, + 'QueryType' => 'DMQL2', + 'Format' => format, + 'Count' => '2' + } + response = request(@urls.search, data, header) + result = @response_parser.parse_count(response.body) + return result + end + + private + + # XXX: This is crap. It does not properly handle quotes. + def process_content_type(text) + content = {} + + field_start = text.index(';') + + content['content-type'] = text[0 ... field_start].strip + fields = text[field_start..-1] + + parts = text.split(';') + + parts.each do |part| + (name, value) = part.gsub(/\"/, '').split('=') + + content[name.strip] = value ? value.strip : value + end + + content + end + + # Processes the HTTP header + #-- + #++ + def process_header(raw) + # this util gives us arrays of values. We are only set up to handle one header value. + WEBrick::HTTPUtils.parse_header(raw.strip).map.inject({}) do |h,(k,v)| + h[k]=v.first; h + end + end + + # This is the primary transaction method, which the other public methods make use of. + # Given a url for the transaction (endpoint) it makes a request to the RETS server. + # + #-- + # This needs to be better documented, but for now please see the public transaction methods + # for how to make use of this method. + #++ + def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY) + @request_struct.request(url, data, header, method, retry_auth) + end + + # If an action URL is present in the URL capability list, it calls that action URL and returns the + # raw result. Throws a generic RETSException if it is unable to follow the URL. + def perform_action_url + begin + if @urls.has_key?('Action') + return request(@urls.action, {}, {}, METHOD_GET) + end + rescue + raise RETSException.new("Unable to follow action URL: '#{$!}'.") + end + end + + # Provides a proxy class to allow for net/http to log its debug to the logger. + class HTTPDebugLogger + def initialize(logger) + @logger = logger + end + + def <<(data) + @logger.debug(data) + end + end + end +end