module Sphinx # The Sphinx Client API is used to communicate with searchd # daemon and perform requests. # # @example # sphinx = Sphinx::Client.new # result = sphinx.query('test') # ids = result['matches'].map { |match| match['id'] } # posts = Post.all :conditions => { :id => ids }, # :order => "FIELD(id,#{ids.join(',')})" # # docs = posts.map(&:body) # excerpts = sphinx.build_excerpts(docs, 'index', 'test') # class Client include Sphinx::Constants #================================================================= # Some internal attributes to use inside client API #================================================================= # List of searchd servers to connect to. # @private attr_reader :servers # Connection timeout in seconds. # @private attr_reader :timeout # Number of connection retries. # @private attr_reader :retries # Request timeout in seconds. # @private attr_reader :reqtimeout # Number of request retries. # @private attr_reader :reqretries # Log debug/info/warn to the given Logger, defaults to nil. # @private attr_reader :logger # Constructs the Sphinx::Client object and sets options # to their default values. # # @param [Logger] logger a logger object to put logs to. No logging # will be performed when not set. # def initialize(logger = nil) # per-query settings @offset = 0 # how many records to seek from result-set start (default is 0) @limit = 20 # how many records to return from result-set starting at offset (default is 20) @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL) @weights = [] # per-field weights (default is 1 for all fields) @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE) @sortby = '' # attribute to sort by (defualt is "") @min_id = 0 # min ID to match (default is 0, which means no limit) @max_id = 0 # max ID to match (default is 0, which means no limit) @filters = [] # search filters @groupby = '' # group-by attribute name @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with) @groupdistinct = '' # group-by count-distinct attribute @maxmatches = 1000 # max matches to retrieve @cutoff = 0 # cutoff to stop searching at (default is 0) @retrycount = 0 # distributed retries count @retrydelay = 0 # distributed retries delay @anchor = [] # geographical anchor point @indexweights = [] # per-index weights @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25) @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit) @fieldweights = {} # per-field-name weights @overrides = [] # per-query attribute values overrides @select = '*' # select-list (attributes or expressions, with optional aliases) # per-reply fields (for single-query case) @error = '' # last error message @warning = '' # last warning message @connerror = false # connection error vs remote error flag @reqs = [] # requests storage (for multi-query case) @mbenc = '' # stored mbstring encoding @timeout = 0 # connect timeout @retries = 1 # number of connect retries in case of emergency @reqtimeout = 0 # request timeout @reqretries = 1 # number of request retries in case of emergency # per-client-object settings # searchd servers list @servers = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze @logger = logger logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger end # Returns a string representation of the sphinx client object. # def inspect params = { :error => @error, :warning => @warning, :connect_error => @connerror, :servers => @servers, :connect_timeout => { :timeout => @timeout, :retries => @retries }, :request_timeout => { :timeout => @reqtimeout, :retries => @reqretries }, :retries => { :count => @retrycount, :delay => @retrydelay }, :limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff }, :max_query_time => @maxquerytime, :overrides => @overrides, :select => @select, :match_mode => @mode, :ranking_mode => @ranker, :sort_mode => { :mode => @sort, :sortby => @sortby }, :weights => @weights, :field_weights => @fieldweights, :index_weights => @indexweights, :id_range => { :min => @min_id, :max => @max_id }, :filters => @filters, :geo_anchor => @anchor, :group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort }, :group_distinct => @groupdistinct } "" % [@servers.length, params.inspect] end #================================================================= # General API functions #================================================================= # Returns last error message, as a string, in human readable format. If there # were no errors during the previous API call, empty string is returned. # # You should call it when any other function (such as {#query}) fails (typically, # the failing function returns false). The returned string will contain the # error description. # # The error message is not reset by this call; so you can safely call it # several times if needed. # # @return [String] last error message. # # @example # puts sphinx.last_error # # @see #last_warning # @see #connect_error? # def last_error @error end alias :GetLastError :last_error # Returns last warning message, as a string, in human readable format. If there # were no warnings during the previous API call, empty string is returned. # # You should call it to verify whether your request (such as {#query}) was # completed but with warnings. For instance, search query against a distributed # index might complete succesfully even if several remote agents timed out. # In that case, a warning message would be produced. # # The warning message is not reset by this call; so you can safely call it # several times if needed. # # @return [String] last warning message. # # @example # puts sphinx.last_warning # # @see #last_error # @see #connect_error? # def last_warning @warning end alias :GetLastWarning :last_warning # Checks whether the last error was a network error on API side, or a # remote error reported by searchd. Returns true if the last connection # attempt to searchd failed on API side, false otherwise (if the error # was remote, or there were no connection attempts at all). # # @return [Boolean] the value indicating whether last error was a # nework error on API side. # # @example # puts "Connection failed!" if sphinx.connect_error? # # @see #last_error # @see #last_warning # def connect_error? @connerror || false end alias :IsConnectError :connect_error? # Sets searchd host name and TCP port. All subsequent requests will # use the new host and port settings. Default +host+ and +port+ are # 'localhost' and 9312, respectively. # # Also, you can specify an absolute path to Sphinx's UNIX socket as +host+, # in this case pass port as +0+ or +nil+. # # @param [String] host the searchd host name or UNIX socket absolute path. # @param [Integer] port the searchd port name (could be any if UNIX # socket path specified). # @return [Sphinx::Client] self. # # @example # sphinx.set_server('localhost', 9312) # sphinx.set_server('/opt/sphinx/var/run/sphinx.sock') # # @raise [ArgumentError] Occurred when parameters are invalid. # @see #set_servers # @see #set_connect_timeout # @see #set_request_timeout # def set_server(host, port = 9312) raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String) path = nil # Check if UNIX socket should be used if host[0] == ?/ path = host elsif host[0, 7] == 'unix://' path = host[7..-1] else raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer) end host = port = nil unless path.nil? @servers = [Sphinx::Server.new(self, host, port, path)].freeze logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger self end alias :SetServer :set_server # Sets the list of searchd servers. Each subsequent request will use next # server in list (round-robin). In case of one server failure, request could # be retried on another server (see {#set_connect_timeout} and # {#set_request_timeout}). # # Method accepts an +Array+ of +Hash+es, each of them should have :host # and :port (to connect to searchd through network) or :path # (an absolute path to UNIX socket) specified. # # @param [Array] servers an +Array+ of +Hash+ objects with servers parameters. # @option servers [String] :host the searchd host name or UNIX socket absolute path. # @option servers [String] :path the searchd UNIX socket absolute path. # @option servers [Integer] :port (9312) the searchd port name (skiped when UNIX # socket path specified) # @return [Sphinx::Client] self. # # @example # sphinx.set_servers([ # { :host => 'browse01.local' }, # default port is 9312 # { :host => 'browse02.local', :port => 9312 }, # { :path => '/opt/sphinx/var/run/sphinx.sock' } # ]) # # @raise [ArgumentError] Occurred when parameters are invalid. # @see #set_server # @see #set_connect_timeout # @see #set_request_timeout # def set_servers(servers) raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array) raise ArgumentError, '"servers" argument must be not empty' if servers.empty? @servers = servers.map do |server| raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash) server = server.with_indifferent_access host = server[:path] || server[:host] port = server[:port] || 9312 path = nil raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String) # Check if UNIX socket should be used if host[0] == ?/ path = host elsif host[0, 7] == 'unix://' path = host[7..-1] else raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer) end host = port = nil unless path.nil? Sphinx::Server.new(self, host, port, path) end.freeze logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger self end alias :SetServers :set_servers # Sets the time allowed to spend connecting to the server before giving up # and number of retries to perform. # # In the event of a failure to connect, an appropriate error code should # be returned back to the application in order for application-level error # handling to advise the user. # # When multiple servers configured through {#set_servers} method, and +retries+ # number is greater than 1, library will try to connect to another server. # In case of single server configured, it will try to reconnect +retries+ # times. # # Please note, this timeout will only be used for connection establishing, not # for regular API requests. # # @param [Integer] timeout a connection timeout in seconds. # @param [Integer] retries number of connect retries. # @return [Sphinx::Client] self. # # @example Set connection timeout to 1 second and number of retries to 5 # sphinx.set_connect_timeout(1, 5) # # @raise [ArgumentError] Occurred when parameters are invalid. # @see #set_server # @see #set_servers # @see #set_request_timeout # def set_connect_timeout(timeout, retries = 1) raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer) raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer) raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0 @timeout = timeout @retries = retries self end alias :SetConnectTimeout :set_connect_timeout # Sets the time allowed to spend performing request to the server before giving up # and number of retries to perform. # # In the event of a failure to do request, an appropriate error code should # be returned back to the application in order for application-level error # handling to advise the user. # # When multiple servers configured through {#set_servers} method, and +retries+ # number is greater than 1, library will try to do another try with this server # (with full reconnect). If connection would fail, behavior depends on # {#set_connect_timeout} settings. # # Please note, this timeout will only be used for request performing, not # for connection establishing. # # @param [Integer] timeout a request timeout in seconds. # @param [Integer] retries number of request retries. # @return [Sphinx::Client] self. # # @example Set request timeout to 1 second and number of retries to 5 # sphinx.set_request_timeout(1, 5) # # @raise [ArgumentError] Occurred when parameters are invalid. # @see #set_server # @see #set_servers # @see #set_connect_timeout # def set_request_timeout(timeout, retries = 1) raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer) raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer) raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0 @reqtimeout = timeout @reqretries = retries self end alias :SetRequestTimeout :set_request_timeout # Sets distributed retry count and delay. # # On temporary failures searchd will attempt up to +count+ retries # per agent. +delay+ is the delay between the retries, in milliseconds. # Retries are disabled by default. Note that this call will not make # the API itself retry on temporary failure; it only tells searchd # to do so. Currently, the list of temporary failures includes all # kinds of connection failures and maxed out (too busy) remote agents. # # @param [Integer] count a number of retries to perform. # @param [Integer] delay a delay between the retries. # @return [Sphinx::Client] self. # # @example Perform 5 retries with 200 ms between them # sphinx.set_retries(5, 200) # # @raise [ArgumentError] Occurred when parameters are invalid. # @see #set_connect_timeout # @see #set_request_timeout # def set_retries(count, delay = 0) raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer) raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer) @retrycount = count @retrydelay = delay self end alias :SetRetries :set_retries #================================================================= # General query settings #================================================================= # Sets offset into server-side result set (+offset+) and amount of matches to # return to client starting from that offset (+limit+). Can additionally control # maximum server-side result set size for current query (+max_matches+) and the # threshold amount of matches to stop searching at (+cutoff+). All parameters # must be non-negative integers. # # First two parameters to {#set_limits} are identical in behavior to MySQL LIMIT # clause. They instruct searchd to return at most +limit+ matches starting from # match number +offset+. The default offset and limit settings are +0+ and +20+, # that is, to return first +20+ matches. # # +max_matches+ setting controls how much matches searchd will keep in RAM # while searching. All matching documents will be normally processed, ranked, # filtered, and sorted even if max_matches is set to +1+. But only best +N+ # documents are stored in memory at any given moment for performance and RAM # usage reasons, and this setting controls that N. Note that there are two # places where max_matches limit is enforced. Per-query limit is controlled # by this API call, but there also is per-server limit controlled by +max_matches+ # setting in the config file. To prevent RAM usage abuse, server will not # allow to set per-query limit higher than the per-server limit. # # You can't retrieve more than +max_matches+ matches to the client application. # The default limit is set to +1000+. Normally, you must not have to go over # this limit. One thousand records is enough to present to the end user. # And if you're thinking about pulling the results to application for further # sorting or filtering, that would be much more efficient if performed on # Sphinx side. # # +cutoff+ setting is intended for advanced performance control. It tells # searchd to forcibly stop search query once $cutoff matches had been found # and processed. # # @param [Integer] offset an offset into server-side result set. # @param [Integer] limit an amount of matches to return. # @param [Integer] max a maximum server-side result set size. # @param [Integer] cutoff a threshold amount of matches to stop searching at. # @return [Sphinx::Client] self. # # @example # sphinx.set_limits(100, 50, 1000, 5000) # # @raise [ArgumentError] Occurred when parameters are invalid. # def set_limits(offset, limit, max = 0, cutoff = 0) raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer) raise ArgumentError, '"limit" argument must be Integer' unless limit.kind_of?(Integer) raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer) raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer) raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0 raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0 raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0 raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0 @offset = offset @limit = limit @maxmatches = max if max > 0 @cutoff = cutoff if cutoff > 0 self end alias :SetLimits :set_limits # Sets maximum search query time, in milliseconds. Parameter must be a # non-negative integer. Default valus is +0+ which means "do not limit". # # Similar to +cutoff+ setting from {#set_limits}, but limits elapsed query # time instead of processed matches count. Local search queries will be # stopped once that much time has elapsed. Note that if you're performing # a search which queries several local indexes, this limit applies to each # index separately. # # @param [Integer] max maximum search query time in milliseconds. # @return [Sphinx::Client] self. # # @example # sphinx.set_max_query_time(200) # # @raise [ArgumentError] Occurred when parameters are invalid. # def set_max_query_time(max) raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer) raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0 @maxquerytime = max self end alias :SetMaxQueryTime :set_max_query_time # Sets temporary (per-query) per-document attribute value overrides. Only # supports scalar attributes. +values+ must be a +Hash+ that maps document # IDs to overridden attribute values. # # Override feature lets you "temporary" update attribute values for some # documents within a single query, leaving all other queries unaffected. # This might be useful for personalized data. For example, assume you're # implementing a personalized search function that wants to boost the posts # that the user's friends recommend. Such data is not just dynamic, but # also personal; so you can't simply put it in the index because you don't # want everyone's searches affected. Overrides, on the other hand, are local # to a single query and invisible to everyone else. So you can, say, setup # a "friends_weight" value for every document, defaulting to 0, then # temporary override it with 1 for documents 123, 456 and 789 (recommended # by exactly the friends of current user), and use that value when ranking. # # You can specify attribute type as String ("integer", "float", etc), # Symbol (:integer, :float, etc), or # Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc). # # @param [String, Symbol] attribute an attribute name to override values of. # @param [Integer, String, Symbol] attrtype attribute type. # @param [Hash] values a +Hash+ that maps document IDs to overridden attribute values. # @return [Sphinx::Client] self. # # @example # sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1}) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setoverride Section 6.2.3, "SetOverride" # def set_override(attribute, attrtype, values) raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) case attrtype when String, Symbol begin attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}") rescue NameError raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" end when Fixnum raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype) else raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol' end raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash) values.each do |id, value| raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer) case attrtype when SPH_ATTR_TIMESTAMP raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time) when SPH_ATTR_FLOAT raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric) else # SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer) end end @overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values } self end alias :SetOverride :set_override # Sets the select clause, listing specific attributes to fetch, and # expressions to compute and fetch. Clause syntax mimics SQL. # # {#set_select} is very similar to the part of a typical SQL query between # +SELECT+ and +FROM+. It lets you choose what attributes (columns) to # fetch, and also what expressions over the columns to compute and fetch. # A certain difference from SQL is that expressions must always be aliased # to a correct identifier (consisting of letters and digits) using +AS+ # keyword. SQL also lets you do that but does not require to. Sphinx enforces # aliases so that the computation results can always be returned under a # "normal" name in the result set, used in other clauses, etc. # # Everything else is basically identical to SQL. Star ('*') is supported. # Functions are supported. Arbitrary amount of expressions is supported. # Computed expressions can be used for sorting, filtering, and grouping, # just as the regular attributes. # # Starting with version 0.9.9-rc2, aggregate functions (AVG(), # MIN(), MAX(), SUM()) are supported when using # GROUP BY. # # Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance # functions ({#set_geo_anchor}) are now internally implemented # using this computed expressions mechanism, using magic names '@expr' # and '@geodist' respectively. # # @param [String] select a select clause, listing specific attributes to fetch. # @return [Sphinx::Client] self. # # @example # sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight') # sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40') # sphinx.set_select('*, AVG(price) AS avgprice') # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#sort-expr Section 4.5, "SPH_SORT_EXPR mode" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setselect Section 6.2.4, "SetSelect" # def set_select(select) raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String) @select = select self end alias :SetSelect :set_select #================================================================= # Full-text search query settings #================================================================= # Sets full-text query matching mode. # # Parameter must be a +Fixnum+ constant specifying one of the known modes # (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier ("all", # "any", etc), or a +Symbol+ (:all, :any, etc). # # @param [Integer, String, Symbol] mode full-text query matching mode. # @return [Sphinx::Client] self. # # @example # sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL) # sphinx.set_match_mode(:all) # sphinx.set_match_mode('all') # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode" # def set_match_mode(mode) case mode when String, Symbol begin mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}") rescue NameError raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" end when Fixnum raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode) else raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol' end @mode = mode self end alias :SetMatchMode :set_match_mode # Sets ranking mode. Only available in +SPH_MATCH_EXTENDED2+ # matching mode at the time of this writing. Parameter must be a # constant specifying one of the known modes. # # You can specify ranking mode as String ("proximity_bm25", "bm25", etc), # Symbol (:proximity_bm25, :bm25, etc), or # Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc). # # @param [Integer, String, Symbol] ranker ranking mode. # @return [Sphinx::Client] self. # # @example # sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25) # sphinx.set_ranking_mode(:bm25) # sphinx.set_ranking_mode('bm25') # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setrankingmode Section 6.3.2, "SetRankingMode" # def set_ranking_mode(ranker) case ranker when String, Symbol begin ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}") rescue NameError raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" end when Fixnum raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_SPH04).include?(ranker) else raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol' end @ranker = ranker self end alias :SetRankingMode :set_ranking_mode # Set matches sorting mode. # # You can specify sorting mode as String ("relevance", "attr_desc", etc), # Symbol (:relevance, :attr_desc, etc), or # Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc). # # @param [Integer, String, Symbol] mode matches sorting mode. # @param [String] sortby sorting clause, with the syntax depending on # specific mode. Should be specified unless sorting mode is # +SPH_SORT_RELEVANCE+. # @return [Sphinx::Client] self. # # @example # sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr') # sphinx.set_sort_mode(:attr_asc, 'attr') # sphinx.set_sort_mode('attr_asc', 'attr') # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#sorting-modes Section 4.5, "Sorting modes" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setsortmode Section 6.3.3, "SetSortMode" # def set_sort_mode(mode, sortby = '') case mode when String, Symbol begin mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}") rescue NameError raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" end when Fixnum raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode) else raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol' end raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String) raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty? @sort = mode @sortby = sortby self end alias :SetSortMode :set_sort_mode # Binds per-field weights in the order of appearance in the index. # # @param [Array] weights an +Array+ of integer per-field weights. # @return [Sphinx::Client] self. # # @example # sphinx.set_weights([1, 3, 5]) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @deprecated Use {#set_field_weights} instead. # @see #set_field_weights # def set_weights(weights) raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array) weights.each do |weight| raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer) end @weights = weights self end alias :SetWeights :set_weights # Binds per-field weights by name. Parameter must be a +Hash+ # mapping string field names to integer weights. # # Match ranking can be affected by per-field weights. For instance, # see Section 4.4, "Weighting" for an explanation how phrase # proximity ranking is affected. This call lets you specify what # non-default weights to assign to different full-text fields. # # The weights must be positive 32-bit integers. The final weight # will be a 32-bit integer too. Default weight value is 1. Unknown # field names will be silently ignored. # # There is no enforced limit on the maximum weight value at the # moment. However, beware that if you set it too high you can # start hitting 32-bit wraparound issues. For instance, if # you set a weight of 10,000,000 and search in extended mode, # then maximum possible weight will be equal to 10 million (your # weight) by 1 thousand (internal BM25 scaling factor, see # Section 4.4, “Weighting”) by 1 or more (phrase proximity rank). # The result is at least 10 billion that does not fit in 32 bits # and will be wrapped around, producing unexpected results. # # @param [Hash] weights a +Hash+ mapping string field names to # integer weights. # @return [Sphinx::Client] self. # # @example # sphinx.set_field_weights(:title => 20, :text => 10) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#weighting Section 4.4, "Weighting" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfieldweights Section 6.3.5, "SetFieldWeights" # def set_field_weights(weights) raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash) weights.each do |name, weight| unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer) raise ArgumentError, '"weights" argument must be Hash map of strings to integers' end end @fieldweights = weights self end alias :SetFieldWeights :set_field_weights # Sets per-index weights, and enables weighted summing of match # weights across different indexes. Parameter must be a hash # (associative array) mapping string index names to integer # weights. Default is empty array that means to disable weighting # summing. # # When a match with the same document ID is found in several # different local indexes, by default Sphinx simply chooses the # match from the index specified last in the query. This is to # support searching through partially overlapping index partitions. # # However in some cases the indexes are not just partitions, # and you might want to sum the weights across the indexes # instead of picking one. {#set_index_weights} lets you do that. # With summing enabled, final match weight in result set will be # computed as a sum of match weight coming from the given index # multiplied by respective per-index weight specified in this # call. Ie. if the document 123 is found in index A with the # weight of 2, and also in index B with the weight of 3, and # you called {#set_index_weights} with {"A"=>100, "B"=>10}, # the final weight return to the client will be 2*100+3*10 = 230. # # @param [Hash] weights a +Hash+ mapping string index names to # integer weights. # @return [Sphinx::Client] self. # # @example # sphinx.set_field_weights(:fresh => 20, :archived => 10) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setindexweights Section 6.3.6, "SetIndexWeights" # def set_index_weights(weights) raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash) weights.each do |index, weight| unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer) raise ArgumentError, '"weights" argument must be Hash map of strings to integers' end end @indexweights = weights self end alias :SetIndexWeights :set_index_weights #================================================================= # Result set filtering settings #================================================================= # Sets an accepted range of document IDs. Parameters must be integers. # Defaults are 0 and 0; that combination means to not limit by range. # # After this call, only those records that have document ID between # +min+ and +max+ (including IDs exactly equal to +min+ or +max+) # will be matched. # # @param [Integer] min min document ID. # @param [Integer] min max document ID. # @return [Sphinx::Client] self. # # @example # sphinx.set_id_range(10, 1000) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setidrange Section 6.4.1, "SetIDRange" # def set_id_range(min, max) raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer) raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer) raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max @min_id = min @max_id = max self end alias :SetIDRange :set_id_range # Adds new integer values set filter. # # On this call, additional new filter is added to the existing # list of filters. $attribute must be a string with attribute # name. +values+ must be a plain array containing integer # values. +exclude+ must be a boolean value; it controls # whether to accept the matching documents (default mode, when # +exclude+ is +false+) or reject them. # # Only those documents where +attribute+ column value stored in # the index matches any of the values from +values+ array will # be matched (or rejected, if +exclude+ is +true+). # # @param [String, Symbol] attribute an attribute name to filter by. # @param [Array, Integer] values an +Array+ of integers or # single Integer with given attribute values. # @param [Boolean] exclude indicating whether documents with given attribute # matching specified values should be excluded from search results. # @return [Sphinx::Client] self. # # @example # sphinx.set_filter(:group_id, [10, 15, 20]) # sphinx.set_filter(:group_id, [10, 15, 20], true) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilter Section 6.4.2, "SetFilter" # @see #set_filter_range # @see #set_filter_float_range # def set_filter(attribute, values, exclude = false) raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) values = [values] if values.kind_of?(Integer) raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array) raise ArgumentError, '"values" argument must be Array of Integers' unless values.all? { |v| v.kind_of?(Integer) } raise ArgumentError, '"exclude" argument must be Boolean' unless [TrueClass, FalseClass].include?(exclude.class) if values.any? @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values } end self end alias :SetFilter :set_filter # Adds new integer range filter. # # On this call, additional new filter is added to the existing # list of filters. +attribute+ must be a string with attribute # name. +min+ and +max+ must be integers that define the acceptable # attribute values range (including the boundaries). +exclude+ # must be a boolean value; it controls whether to accept the # matching documents (default mode, when +exclude+ is false) or # reject them. # # Only those documents where +attribute+ column value stored # in the index is between +min+ and +max+ (including values # that are exactly equal to +min+ or +max+) will be matched # (or rejected, if +exclude+ is true). # # @param [String, Symbol] attribute an attribute name to filter by. # @param [Integer] min min value of the given attribute. # @param [Integer] max max value of the given attribute. # @param [Boolean] exclude indicating whether documents with given attribute # matching specified boundaries should be excluded from search results. # @return [Sphinx::Client] self. # # @example # sphinx.set_filter_range(:group_id, 10, 20) # sphinx.set_filter_range(:group_id, 10, 20, true) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterrange Section 6.4.3, "SetFilterRange" # @see #set_filter # @see #set_filter_float_range # def set_filter_range(attribute, min, max, exclude = false) raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer) raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer) raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass) @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max } self end alias :SetFilterRange :set_filter_range # Adds new float range filter. # # On this call, additional new filter is added to the existing # list of filters. +attribute+ must be a string with attribute name. # +min+ and +max+ must be floats that define the acceptable # attribute values range (including the boundaries). +exclude+ must # be a boolean value; it controls whether to accept the matching # documents (default mode, when +exclude+ is false) or reject them. # # Only those documents where +attribute+ column value stored in # the index is between +min+ and +max+ (including values that are # exactly equal to +min+ or +max+) will be matched (or rejected, # if +exclude+ is true). # # @param [String, Symbol] attribute an attribute name to filter by. # @param [Numeric] min min value of the given attribute. # @param [Numeric] max max value of the given attribute. # @param [Boolean] exclude indicating whether documents with given attribute # matching specified boundaries should be excluded from search results. # @return [Sphinx::Client] self. # # @example # sphinx.set_filter_float_range(:group_id, 10.5, 20) # sphinx.set_filter_float_range(:group_id, 10.5, 20, true) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterfloatrange Section 6.4.4, "SetFilterFloatRange" # @see #set_filter # @see #set_filter_range # def set_filter_float_range(attribute, min, max, exclude = false) raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) raise ArgumentError, '"min" argument must be Numeric' unless min.kind_of?(Numeric) raise ArgumentError, '"max" argument must be Numeric' unless max.kind_of?(Numeric) raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass) @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f } self end alias :SetFilterFloatRange :set_filter_float_range # Sets anchor point for and geosphere distance (geodistance) # calculations, and enable them. # # +attrlat+ and +attrlong+ must be strings that contain the names # of latitude and longitude attributes, respectively. +lat+ and # +long+ are floats that specify anchor point latitude and # longitude, in radians. # # Once an anchor point is set, you can use magic "@geodist" # attribute name in your filters and/or sorting expressions. # Sphinx will compute geosphere distance between the given anchor # point and a point specified by latitude and lognitude attributes # from each full-text match, and attach this value to the resulting # match. The latitude and longitude values both in {#set_geo_anchor} # and the index attribute data are expected to be in radians. # The result will be returned in meters, so geodistance value of # 1000.0 means 1 km. 1 mile is approximately 1609.344 meters. # # @param [String, Symbol] attrlat a name of latitude attribute. # @param [String, Symbol] attrlong a name of longitude attribute. # @param [Numeric] lat an anchor point latitude, in radians. # @param [Numeric] long an anchor point longitude, in radians. # @return [Sphinx::Client] self. # # @example # sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor" # def set_geo_anchor(attrlat, attrlong, lat, long) raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol) raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol) raise ArgumentError, '"lat" argument must be Numeric' unless lat.kind_of?(Numeric) raise ArgumentError, '"long" argument must be Numeric' unless long.kind_of?(Numeric) @anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f } self end alias :SetGeoAnchor :set_geo_anchor #================================================================= # GROUP BY settings #================================================================= # Sets grouping attribute, function, and groups sorting mode; and # enables grouping (as described in Section 4.6, "Grouping (clustering) search results"). # # +attribute+ is a string that contains group-by attribute name. # +func+ is a constant that chooses a function applied to the # attribute value in order to compute group-by key. +groupsort+ # is a clause that controls how the groups will be sorted. Its # syntax is similar to that described in Section 4.5, # "SPH_SORT_EXTENDED mode". # # Grouping feature is very similar in nature to GROUP BY clause # from SQL. Results produces by this function call are going to # be the same as produced by the following pseudo code: # # SELECT ... GROUP BY func(attribute) ORDER BY groupsort # # Note that it's +groupsort+ that affects the order of matches in # the final result set. Sorting mode (see {#set_sort_mode}) affect # the ordering of matches within group, ie. what match will be # selected as the best one from the group. So you can for instance # order the groups by matches count and select the most relevant # match within each group at the same time. # # Starting with version 0.9.9-rc2, aggregate functions (AVG(), # MIN(), MAX(), SUM()) are supported # through {#set_select} API call when using GROUP BY. # # You can specify group function and attribute as String # ("attr", "day", etc), Symbol (:attr, :day, etc), or # Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc). # # @param [String, Symbol] attribute an attribute name to group by. # @param [Integer, String, Symbol] func a grouping function. # @param [String] groupsort a groups sorting mode. # @return [Sphinx::Client] self. # # @example # sphinx.set_group_by(:tag_id, :attr) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#clustering Section 4.6, "Grouping (clustering) search results" # @see http://www.sphinxsearch.com/docs/current.html#sort-extended Section 4.5, "SPH_SORT_EXTENDED mode" # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupby Section 6.5.1, "SetGroupBy" # @see #set_sort_mode # @see #set_select # @see #set_group_distinct # def set_group_by(attribute, func, groupsort = '@group desc') raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String) case func when String, Symbol begin func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}") rescue NameError raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" end when Fixnum raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func) else raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol' end @groupby = attribute.to_s @groupfunc = func @groupsort = groupsort self end alias :SetGroupBy :set_group_by # Sets attribute name for per-group distinct values count # calculations. Only available for grouping queries. # # +attribute+ is a string that contains the attribute name. For # each group, all values of this attribute will be stored (as # RAM limits permit), then the amount of distinct values will # be calculated and returned to the client. This feature is # similar to COUNT(DISTINCT) clause in standard SQL; # so these Sphinx calls: # # sphinx.set_group_by(:category, :attr, '@count desc') # sphinx.set_group_distinct(:vendor) # # can be expressed using the following SQL clauses: # # SELECT id, weight, all-attributes, # COUNT(DISTINCT vendor) AS @distinct, # COUNT(*) AS @count # FROM products # GROUP BY category # ORDER BY @count DESC # # In the sample pseudo code shown just above, {#set_group_distinct} # call corresponds to COUNT(DISINCT vendor) clause only. # GROUP BY, ORDER BY, and COUNT(*) # clauses are all an equivalent of {#set_group_by} settings. Both # queries will return one matching row for each category. In # addition to indexed attributes, matches will also contain # total per-category matches count, and the count of distinct # vendor IDs within each category. # # @param [String, Symbol] attribute an attribute name. # @return [Sphinx::Client] self. # # @example # sphinx.set_group_distinct(:category_id) # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupdistinct Section 6.5.2, "SetGroupDistinct" # @see #set_group_by # def set_group_distinct(attribute) raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol) @groupdistinct = attribute.to_s self end alias :SetGroupDistinct :set_group_distinct #================================================================= # Querying #================================================================= # Clears all currently set filters. # # This call is only normally required when using multi-queries. You might want # to set different filters for different queries in the batch. To do that, # you should call {#reset_filters} and add new filters using the respective calls. # # @return [Sphinx::Client] self. # # @example # sphinx.reset_filters # # @see #set_filter # @see #set_filter_range # @see #set_filter_float_range # @see #set_geo_anchor # def reset_filters @filters = [] @anchor = [] self end alias :ResetFilters :reset_filters # Clears all currently group-by settings, and disables group-by. # # This call is only normally required when using multi-queries. You can # change individual group-by settings using {#set_group_by} and {#set_group_distinct} # calls, but you can not disable group-by using those calls. {#reset_group_by} # fully resets previous group-by settings and disables group-by mode in the # current state, so that subsequent {#add_query} calls can perform non-grouping # searches. # # @return [Sphinx::Client] self. # # @example # sphinx.reset_group_by # # @see #set_group_by # @see #set_group_distinct # def reset_group_by @groupby = '' @groupfunc = SPH_GROUPBY_DAY @groupsort = '@group desc' @groupdistinct = '' self end alias :ResetGroupBy :reset_group_by # Clear all attribute value overrides (for multi-queries). # # This call is only normally required when using multi-queries. You might want # to set field overrides for different queries in the batch. To do that, # you should call {#reset_overrides} and add new overrides using the # respective calls. # # @return [Sphinx::Client] self. # # @example # sphinx.reset_overrides # # @see #set_override # def reset_overrides @overrides = [] self end alias :ResetOverrides :reset_overrides # Connects to searchd server, runs given search query with # current settings, obtains and returns the result set. # # +query+ is a query string. +index+ is an index name (or names) # string. Returns false and sets {#last_error} message on general # error. Returns search result set on success. Additionally, # the contents of +comment+ are sent to the query log, marked in # square brackets, just before the search terms, which can be very # useful for debugging. Currently, the comment is limited to 128 # characters. # # Default value for +index+ is "*" that means to query # all local indexes. Characters allowed in index names include # Latin letters (a-z), numbers (0-9), minus sign (-), and # underscore (_); everything else is considered a separator. # Therefore, all of the following samples calls are valid and # will search the same two indexes: # # sphinx.query('test query', 'main delta') # sphinx.query('test query', 'main;delta') # sphinx.query('test query', 'main, delta'); # # Index specification order matters. If document with identical # IDs are found in two or more indexes, weight and attribute # values from the very last matching index will be used for # sorting and returning to client (unless explicitly overridden # with {#set_index_weights}). Therefore, in the example above, # matches from "delta" index will always win over matches # from "main". # # On success, {#query} returns a result set that contains some # of the found matches (as requested by {#set_limits}) and # additional general per-query statistics. The result set # is an +Hash+ with the following keys and values: # # "matches":: # Array with small +Hash+es containing document weight and # attribute values. # "total":: # Total amount of matches retrieved on server (ie. to the server # side result set) by this query. You can retrieve up to this # amount of matches from server for this query text with current # query settings. # "total_found":: # Total amount of matching documents in index (that were found # and procesed on server). # "words":: # Hash which maps query keywords (case-folded, stemmed, and # otherwise processed) to a small Hash with per-keyword statitics # ("docs", "hits"). # "error":: # Query error message reported by searchd (string, human readable). # Empty if there were no errors. # "warning":: # Query warning message reported by searchd (string, human readable). # Empty if there were no warnings. # # Please note: you can use both strings and symbols as Hash keys. # # It should be noted that {#query} carries out the same actions as # {#add_query} and {#run_queries} without the intermediate steps; it # is analoguous to a single {#add_query} call, followed by a # corresponding {#run_queries}, then returning the first array # element of matches (from the first, and only, query.) # # @param [String] query a query string. # @param [String] index an index name (or names). # @param [String] comment a comment to be sent to the query log. # @return [Hash, false] result set described above or +false+ on error. # @yield [Client] yields just before query performing. Useful to set # filters or sortings. When block does not accept any parameters, it # will be eval'ed inside {Client} instance itself. In this case you # can omit +set_+ prefix for configuration methods. # @yieldparam [Client] sphinx self. # # @example Regular query with previously set filters # sphinx.query('some search text', '*', 'search page') # @example Query with block # sphinx.query('test') do |sphinx| # sphinx.set_match_mode :all # sphinx.set_id_range 10, 100 # end # @example Query with instant filters configuring # sphinx.query('test') do # match_mode :all # id_range 10, 100 # end # # @see http://www.sphinxsearch.com/docs/current.html#api-func-query Section 6.6.1, "Query" # @see #add_query # @see #run_queries # def query(query, index = '*', comment = '', &block) @reqs = [] if block_given? if block.arity > 0 yield self else begin @inside_eval = true instance_eval(&block) ensure @inside_eval = false end end end logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger self.add_query(query, index, comment, false) results = self.run_queries # probably network error; error message should be already filled return false unless results.instance_of?(Array) @error = results[0]['error'] @warning = results[0]['warning'] return false if results[0]['status'] == SEARCHD_ERROR return results[0] end alias :Query :query # Adds additional query with current settings to multi-query batch. # +query+ is a query string. +index+ is an index name (or names) # string. Additionally if provided, the contents of +comment+ are # sent to the query log, marked in square brackets, just before # the search terms, which can be very useful for debugging. # Currently, this is limited to 128 characters. Returns index # to results array returned from {#run_queries}. # # Batch queries (or multi-queries) enable searchd to perform # internal optimizations if possible. They also reduce network # connection overheads and search process creation overheads in all # cases. They do not result in any additional overheads compared # to simple queries. Thus, if you run several different queries # from your web page, you should always consider using multi-queries. # # For instance, running the same full-text query but with different # sorting or group-by settings will enable searchd to perform # expensive full-text search and ranking operation only once, but # compute multiple group-by results from its output. # # This can be a big saver when you need to display not just plain # search results but also some per-category counts, such as the # amount of products grouped by vendor. Without multi-query, you # would have to run several queries which perform essentially the # same search and retrieve the same matches, but create result # sets differently. With multi-query, you simply pass all these # queries in a single batch and Sphinx optimizes the redundant # full-text search internally. # # {#add_query} internally saves full current settings state along # with the query, and you can safely change them afterwards for # subsequent {#add_query} calls. Already added queries will not # be affected; there's actually no way to change them at all. # Here's an example: # # sphinx.set_sort_mode(:relevance) # sphinx.add_query("hello world", "documents") # # sphinx.set_sort_mode(:attr_desc, :price) # sphinx.add_query("ipod", "products") # # sphinx.add_query("harry potter", "books") # # results = sphinx.run_queries # # With the code above, 1st query will search for "hello world" # in "documents" index and sort results by relevance, 2nd query # will search for "ipod" in "products" index and sort results # by price, and 3rd query will search for "harry potter" in # "books" index while still sorting by price. Note that 2nd # {#set_sort_mode} call does not affect the first query (because # it's already added) but affects both other subsequent queries. # # Additionally, any filters set up before an {#add_query} will # fall through to subsequent queries. So, if {#set_filter} is # called before the first query, the same filter will be in # place for the second (and subsequent) queries batched through # {#add_query} unless you call {#reset_filters} first. Alternatively, # you can add additional filters as well. # # This would also be true for grouping options and sorting options; # no current sorting, filtering, and grouping settings are affected # by this call; so subsequent queries will reuse current query settings. # # {#add_query} returns an index into an array of results that will # be returned from {#run_queries} call. It is simply a sequentially # increasing 0-based integer, ie. first call will return 0, second # will return 1, and so on. Just a small helper so you won't have # to track the indexes manualy if you need then. # # @param [String] query a query string. # @param [String] index an index name (or names). # @param [String] comment a comment to be sent to the query log. # @param [Boolean] log indicating whether this call should be logged. # @return [Integer] an index into an array of results that will # be returned from {#run_queries} call. # # @example # sphinx.add_query('some search text', '*', 'search page') # # @see http://www.sphinxsearch.com/docs/current.html#api-func-addquery Section 6.6.2, "AddQuery" # @see #query # @see #run_queries # def add_query(query, index = '*', comment = '', log = true) logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger # build request # mode and limits request = Request.new request.put_int @offset, @limit, @mode, @ranker, @sort request.put_string @sortby # query itself request.put_string query # weights request.put_int_array @weights # indexes request.put_string index # id64 range marker request.put_int 1 # id64 range request.put_int64 @min_id.to_i, @max_id.to_i # filters request.put_int @filters.length @filters.each do |filter| request.put_string filter['attr'] request.put_int filter['type'] case filter['type'] when SPH_FILTER_VALUES request.put_int64_array filter['values'] when SPH_FILTER_RANGE request.put_int64 filter['min'], filter['max'] when SPH_FILTER_FLOATRANGE request.put_float filter['min'], filter['max'] else raise SphinxInternalError, 'Internal error: unhandled filter type' end request.put_int filter['exclude'] ? 1 : 0 end # group-by clause, max-matches count, group-sort clause, cutoff count request.put_int @groupfunc request.put_string @groupby request.put_int @maxmatches request.put_string @groupsort request.put_int @cutoff, @retrycount, @retrydelay request.put_string @groupdistinct # anchor point if @anchor.empty? request.put_int 0 else request.put_int 1 request.put_string @anchor['attrlat'], @anchor['attrlong'] request.put_float @anchor['lat'], @anchor['long'] end # per-index weights request.put_int @indexweights.length @indexweights.each do |idx, weight| request.put_string idx.to_s request.put_int weight end # max query time request.put_int @maxquerytime # per-field weights request.put_int @fieldweights.length @fieldweights.each do |field, weight| request.put_string field.to_s request.put_int weight end # comment request.put_string comment # attribute overrides request.put_int @overrides.length for entry in @overrides do request.put_string entry['attr'] request.put_int entry['type'], entry['values'].size entry['values'].each do |id, val| request.put_int64 id case entry['type'] when SPH_ATTR_FLOAT request.put_float val.to_f when SPH_ATTR_BIGINT request.put_int64 val.to_i else request.put_int val.to_i end end end # select-list request.put_string @select # store request to requests array @reqs << request.to_s; return @reqs.length - 1 end alias :AddQuery :add_query # Connect to searchd, runs a batch of all queries added using # {#add_query}, obtains and returns the result sets. Returns # +false+ and sets {#last_error} message on general error # (such as network I/O failure). Returns a plain array of # result sets on success. # # Each result set in the returned array is exactly the same as # the result set returned from {#query}. # # Note that the batch query request itself almost always succeds — # unless there's a network error, blocking index rotation in # progress, or another general failure which prevents the whole # request from being processed. # # However individual queries within the batch might very well # fail. In this case their respective result sets will contain # non-empty "error" message, but no matches or query statistics. # In the extreme case all queries within the batch could fail. # There still will be no general error reported, because API # was able to succesfully connect to searchd, submit the batch, # and receive the results — but every result set will have a # specific error message. # # @return [Array] an +Array+ of +Hash+es which are exactly # the same as the result set returned from {#query}. # # @example # sphinx.add_query('some search text', '*', 'search page') # results = sphinx.run_queries # # @see http://www.sphinxsearch.com/docs/current.html#api-func-runqueries Section 6.6.3, "RunQueries" # @see #add_query # def run_queries logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger if @reqs.empty? @error = 'No queries defined, issue add_query() first' return false end reqs, nreqs = @reqs.join(''), @reqs.length @reqs = [] response = perform_request(:search, reqs, nreqs) # parse response (1..nreqs).map do result = HashWithIndifferentAccess.new('error' => '', 'warning' => '') # extract status status = result['status'] = response.get_int if status != SEARCHD_OK message = response.get_string if status == SEARCHD_WARNING result['warning'] = message else result['error'] = message next result end end # read schema nfields = response.get_int result['fields'] = (1..nfields).map { response.get_string } attrs_names_in_order = [] nattrs = response.get_int attrs = (1..nattrs).inject({}) do |hash, idx| name, type = response.get_string, response.get_int hash[name] = type attrs_names_in_order << name hash end result['attrs'] = attrs # read match count count, id64 = response.get_ints(2) # read matches result['matches'] = (1..count).map do doc, weight = if id64 == 0 response.get_ints(2) else [response.get_int64, response.get_int] end # This is a single result put in the result['matches'] array match = { 'id' => doc, 'weight' => weight } match['attrs'] = attrs_names_in_order.inject({}) do |hash, name| hash[name] = case attrs[name] when SPH_ATTR_BIGINT # handle 64-bit ints response.get_int64 when SPH_ATTR_FLOAT # handle floats response.get_float when SPH_ATTR_STRING response.get_string else # handle everything else as unsigned ints val = response.get_int if (attrs[name] & SPH_ATTR_MULTI) != 0 (1..val).map { response.get_int } else val end end hash end match end result['total'], result['total_found'], msecs = response.get_ints(3) result['time'] = '%.3f' % (msecs / 1000.0) nwords = response.get_int result['words'] = (1..nwords).inject({}) do |hash, idx| word = response.get_string docs, hits = response.get_ints(2) hash[word] = { 'docs' => docs, 'hits' => hits } hash end result end end alias :RunQueries :run_queries #================================================================= # Additional functionality #================================================================= # Excerpts (snippets) builder function. Connects to searchd, asks # it to generate excerpts (snippets) from given documents, and # returns the results. # # +docs+ is a plain array of strings that carry the documents' # contents. +index+ is an index name string. Different settings # (such as charset, morphology, wordforms) from given index will # be used. +words+ is a string that contains the keywords to # highlight. They will be processed with respect to index settings. # For instance, if English stemming is enabled in the index, # "shoes" will be highlighted even if keyword is "shoe". Starting # with version 0.9.9-rc1, keywords can contain wildcards, that # work similarly to star-syntax available in queries. # # @param [Array] docs an array of strings which represent # the documents' contents. # @param [String] index an index which settings will be used for # stemming, lexing and case folding. # @param [String] words a string which contains the words to highlight. # @param [Hash] opts a +Hash+ which contains additional optional # highlighting parameters. # @option opts [String] 'before_match' ("") a string to insert before a # keyword match. # @option opts [String] 'after_match' ("") a string to insert after a # keyword match. # @option opts [String] 'chunk_separator' (" ... ") a string to insert # between snippet chunks (passages). # @option opts [Integer] 'limit' (256) maximum snippet size, in symbols # (codepoints). # @option opts [Integer] 'around' (5) how many words to pick around # each matching keywords block. # @option opts [Boolean] 'exact_phrase' (false) whether to highlight exact # query phrase matches only instead of individual keywords. # @option opts [Boolean] 'single_passage' (false) whether to extract single # best passage only. # @option opts [Boolean] 'use_boundaries' (false) whether to extract # passages by phrase boundaries setup in tokenizer. # @option opts [Boolean] 'weight_order' (false) whether to sort the # extracted passages in order of relevance (decreasing weight), # or in order of appearance in the document (increasing position). # @return [Array, false] a plain array of strings with # excerpts (snippets) on success; otherwise, +false+. # # @raise [ArgumentError] Occurred when parameters are invalid. # # @example # sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello') # # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildexcerpts Section 6.7.1, "BuildExcerpts" # def build_excerpts(docs, index, words, opts = {}) raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array) raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol) raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String) raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash) docs.each do |doc| raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String) end # fixup options opts = HashWithIndifferentAccess.new( 'before_match' => '', 'after_match' => '', 'chunk_separator' => ' ... ', 'limit' => 256, 'around' => 5, 'exact_phrase' => false, 'single_passage' => false, 'use_boundaries' => false, 'weight_order' => false, 'query_mode' => false ).update(opts) # build request # v.1.0 req flags = 1 flags |= 2 if opts['exact_phrase'] flags |= 4 if opts['single_passage'] flags |= 8 if opts['use_boundaries'] flags |= 16 if opts['weight_order'] flags |= 32 if opts['query_mode'] request = Request.new request.put_int 0, flags # mode=0, flags=1 (remove spaces) # req index request.put_string index.to_s # req words request.put_string words # options request.put_string opts['before_match'] request.put_string opts['after_match'] request.put_string opts['chunk_separator'] request.put_int opts['limit'].to_i, opts['around'].to_i # documents request.put_int docs.size request.put_string(*docs) response = perform_request(:excerpt, request) # parse response docs.map { response.get_string } end alias :BuildExcerpts :build_excerpts # Extracts keywords from query using tokenizer settings for given # index, optionally with per-keyword occurrence statistics. # Returns an array of hashes with per-keyword information. # # +query+ is a query to extract keywords from. +index+ is a name of # the index to get tokenizing settings and keyword occurrence # statistics from. +hits+ is a boolean flag that indicates whether # keyword occurrence statistics are required. # # The result set consists of +Hash+es with the following keys and values: # # 'tokenized':: # Tokenized keyword. # 'normalized':: # Normalized keyword. # 'docs':: # A number of documents where keyword is found (if +hits+ param is +true+). # 'hits':: # A number of keywords occurrences among all documents (if +hits+ param is +true+). # # @param [String] query a query string. # @param [String] index an index to get tokenizing settings and # keyword occurrence statistics from. # @param [Boolean] hits indicates whether keyword occurrence # statistics are required. # @return [Array] an +Array+ of +Hash+es in format specified # above. # # @raise [ArgumentError] Occurred when parameters are invalid. # # @example # keywords = sphinx.build_keywords("this.is.my query", "test1", false) # # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildkeywords Section 6.7.3, "BuildKeywords" # def build_keywords(query, index, hits) raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String) raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol) raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass) # build request request = Request.new # v.1.0 req request.put_string query # req query request.put_string index # req index request.put_int hits ? 1 : 0 response = perform_request(:keywords, request) # parse response nwords = response.get_int (0...nwords).map do tokenized = response.get_string normalized = response.get_string entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized) entry['docs'], entry['hits'] = response.get_ints(2) if hits entry end end alias :BuildKeywords :build_keywords # Instantly updates given attribute values in given documents. # Returns number of actually updated documents (0 or more) on # success, or -1 on failure. # # +index+ is a name of the index (or indexes) to be updated. # +attrs+ is a plain array with string attribute names, listing # attributes that are updated. +values+ is a Hash where key is # document ID, and value is a plain array of new attribute values. # # +index+ can be either a single index name or a list, like in # {#query}. Unlike {#query}, wildcard is not allowed and all the # indexes to update must be specified explicitly. The list of # indexes can include distributed index names. Updates on # distributed indexes will be pushed to all agents. # # The updates only work with docinfo=extern storage strategy. # They are very fast because they're working fully in RAM, but # they can also be made persistent: updates are saved on disk # on clean searchd shutdown initiated by SIGTERM signal. With # additional restrictions, updates are also possible on MVA # attributes; refer to mva_updates_pool directive for details. # # The first sample statement will update document 1 in index # "test1", setting "group_id" to 456. The second one will update # documents 1001, 1002 and 1003 in index "products". For document # 1001, the new price will be set to 123 and the new amount in # stock to 5; for document 1002, the new price will be 37 and the # new amount will be 11; etc. The third one updates document 1 # in index "test2", setting MVA attribute "group_id" to [456, 789]. # # @example # sphinx.update_attributes("test1", ["group_id"], { 1 => [456] }); # sphinx.update_attributes("products", ["price", "amount_in_stock"], # { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] }); # sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true) # # @param [String] index a name of the index to be updated. # @param [Array] attrs an array of attribute name strings. # @param [Hash] values is a hash where key is document id, and # value is an array of new attribute values. # @param [Boolean] mva indicating whether to update MVA. # @return [Integer] number of actually updated documents (0 or more) on success, # -1 on failure. # # @raise [ArgumentError] Occurred when parameters are invalid. # # @see http://www.sphinxsearch.com/docs/current.html#api-func-updateatttributes Section 6.7.2, "UpdateAttributes" # def update_attributes(index, attrs, values, mva = false) # verify everything raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol) raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass) raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array) attrs.each do |attr| raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol) end raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash) values.each do |id, entry| raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer) raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array) raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length entry.each do |v| if mva raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array) v.each do |vv| raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer) end else raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer) end end end # build request request = Request.new request.put_string index request.put_int attrs.length for attr in attrs request.put_string attr request.put_int mva ? 1 : 0 end request.put_int values.length values.each do |id, entry| request.put_int64 id if mva entry.each { |v| request.put_int_array v } else request.put_int(*entry) end end response = perform_request(:update, request) # parse response response.get_int end alias :UpdateAttributes :update_attributes # Escapes characters that are treated as special operators by the # query language parser. # # This function might seem redundant because it's trivial to # implement in any calling application. However, as the set of # special characters might change over time, it makes sense to # have an API call that is guaranteed to escape all such # characters at all times. # # @param [String] string is a string to escape. # @return [String] an escaped string. # # @example: # escaped = sphinx.escape_string "escaping-sample@query/string" # def escape_string(string) string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1') end alias :EscapeString :escape_string # Queries searchd status, and returns an array of status variable name # and value pairs. # # @return [Array, Array] a table containing searchd status information. # If there are more than one server configured ({#set_servers}), an # +Array+ of +Hash+es will be returned, one for each server. Hash will # contain :server element with string name of server (host:port) # and :status table just like one for a single server. In case of # any error, it will be stored in the :error key. # # @example Single server # status = sphinx.status # puts status.map { |key, value| "#{key.rjust(20)}: #{value}" } # # @example Multiple servers # sphinx.set_servers([ # { :host => 'localhost' }, # { :host => 'browse02.local' } # ]) # sphinx.status.each do |report| # puts "=== #{report[:server]}" # if report[:error] # puts "Error: #{report[:error]}" # else # puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" } # end # end # def status request = Request.new request.put_int(1) # parse response results = @servers.map do |server| begin response = perform_request(:status, request, nil, server) rows, cols = response.get_ints(2) status = (0...rows).map do (0...cols).map { response.get_string } end HashWithIndifferentAccess.new(:server => server.to_s, :status => status) rescue SphinxError # Re-raise error when a single server configured raise if @servers.size == 1 HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error) end end @servers.size > 1 ? results : results.first[:status] end alias :Status :status # Force attribute flush, and block until it completes. # # @return [Integer] current internal flush tag on success, -1 on failure. # # @example # sphinx.flush_attrs # def flush_attrs request = Request.new response = perform_request(:flushattrs, request) # parse response begin response.get_int rescue EOFError -1 end end alias :FlushAttrs :flush_attrs #================================================================= # Persistent connections #================================================================= # Opens persistent connection to the server. # # This method could be used only when a single searchd server # configured. # # @return [Boolean] +true+ when persistent connection has been # established; otherwise, +false+. # # @example # begin # sphinx.open # # perform several requests # ensure # sphinx.close # end # # @see #close # def open if @servers.size > 1 @error = 'too many servers. persistent socket allowed only for a single server.' return false end if @servers.first.persistent? @error = 'already connected' return false; end request = Request.new request.put_int(1) perform_request(:persist, request, nil) do |server, socket| server.make_persistent!(socket) end true end alias :Open :open # Closes previously opened persistent connection. # # This method could be used only when a single searchd server # configured. # # @return [Boolean] +true+ when persistent connection has been # closed; otherwise, +false+. # # @example # begin # sphinx.open # # perform several requests # ensure # sphinx.close # end # # @see #open # def close if @servers.size > 1 @error = 'too many servers. persistent socket allowed only for a single server.' return false end unless @servers.first.persistent? @error = 'not connected' return false; end @servers.first.close_persistent! end alias :Close :close protected # Connect, send query, get response. # # Use this method to communicate with Sphinx server. It ensures connection # will be instantiated properly, all headers will be generated properly, etc. # # @param [Symbol, String] command searchd command to perform (:search, :excerpt, # :update, :keywords, :persist, :status, # :query, :flushattrs. See SEARCHD_COMMAND_* for details). # @param [Sphinx::Request] request contains request body. # @param [Integer] additional additional integer data to be placed between header and body. # @param [Sphinx::Server] server where perform request on. This is special # parameter for internal usage. If specified, request will be performed # on specified server, and it will try to establish connection to this # server only once. # # @yield if block given, response will not be parsed, plain socket # will be yielded instead. This is special mode used for # persistent connections, do not use for other tasks. # @yieldparam [Sphinx::Server] server a server where request was performed on. # @yieldparam [Sphinx::BufferedIO] socket a socket used to perform the request. # @return [Sphinx::Response] contains response body. # # @see #parse_response # def perform_request(command, request, additional = nil, server = nil) if server attempts = 1 else server = case request when String Zlib.crc32(request) when Request request.crc32 else raise ArgumentError, "request argument must be String or Sphinx::Request" end attempts = nil end with_server(server, attempts) do |server| logger.info { "[sphinx] #{command} on server #{server}" } if logger cmd = command.to_s.upcase command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}") command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}") with_socket(server) do |socket| len = request.to_s.length + (additional.nil? ? 0 : 4) header = [command_id, command_ver, len].pack('nnN') header << [additional].pack('N') unless additional.nil? socket.write(header + request.to_s) if block_given? yield server, socket else parse_response(socket, command_ver) end end end end # This is internal method which gets and parses response packet from # searchd server. # # There are several exceptions which could be thrown in this method: # # @param [Sphinx::BufferedIO] socket an input stream object. # @param [Integer] client_version a command version which client supports. # @return [Sphinx::Response] could be used for context-based # parsing of reply from the server. # # @raise [SystemCallError, SocketError] should be handled by caller (see {#with_socket}). # @raise [SphinxResponseError] incomplete reply from searchd. # @raise [SphinxInternalError] searchd internal error. # @raise [SphinxTemporaryError] searchd temporary error. # @raise [SphinxUnknownError] searchd unknown error. # # @see #with_socket # @private # def parse_response(socket, client_version) response = '' status = ver = len = 0 # Read server reply from server. All exceptions are handled by {#with_socket}. header = socket.read(8) if header.length == 8 status, ver, len = header.unpack('n2N') response = socket.read(len) if len > 0 end # check response read = response.length if response.empty? or read != len.to_i error = len > 0 \ ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \ : 'received zero-sized searchd response' raise SphinxResponseError, error end # check status if (status == SEARCHD_WARNING) wlen = response[0, 4].unpack('N*').first @warning = response[4, wlen] return response[4 + wlen, response.length - 4 - wlen] end if status == SEARCHD_ERROR error = 'searchd error: ' + response[4, response.length - 4] raise SphinxInternalError, error end if status == SEARCHD_RETRY error = 'temporary searchd error: ' + response[4, response.length - 4] raise SphinxTemporaryError, error end unless status == SEARCHD_OK error = "unknown status code: '#{status}'" raise SphinxUnknownError, error end # check version if ver < client_version @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " + "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work" end Response.new(response) end # This is internal method which selects next server (round-robin) # and yields it to the block passed. # # In case of connection error, it will try next server several times # (see {#set_connect_timeout} method details). If all servers are down, # it will set error attribute (could be retrieved with {#last_error} # method) with the last exception message, and {#connect_error?} # method will return true. Also, {SphinxConnectError} exception # will be raised. # # @overload with_server(server_index) # Get the server based on some seed value (usually CRC32 of # request. In this case initial server will be choosed using # this seed value, in case of connetion failure next server # in servers list will be used). # @param [Integer] server_index server index, must be any # integer value (not necessarily less than number of servers.) # @param [Integer] attempts how many retries to perform. Use # +nil+ to perform retries configured with {#set_connect_timeout}. # @overload with_server(server) # Get the server specified as a parameter. If specified, request # will be performed on specified server, and it will try to # establish connection to this server only once. # @param [Server] server server to perform request on. # @param [Integer] attempts how many retries to perform. Use # +nil+ to perform retries configured with {#set_connect_timeout}. # # @yield a block which performs request on a given server. # @yieldparam [Sphinx::Server] server contains information # about the server to perform request on. # @raise [SphinxConnectError] on any connection error. # def with_server(server = nil, attempts = nil) case server when Server idx = @servers.index(server) || 0 s = server when Integer idx = server % @servers.size s = @servers[idx] when NilClass idx = 0 s = @servers[idx] else raise ArgumentError, 'server argument must be Integer or Sphinx::Server' end attempts ||= @retries begin yield s rescue SphinxConnectError => e logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger # Connection error! Do we need to try it again? attempts -= 1 if attempts > 0 logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger # Get the next server idx = (idx + 1) % @servers.size s = @servers[idx] retry end # Re-raise original exception @error = e.message @connerror = true raise end end # This is internal method which retrieves socket for a given server, # initiates Sphinx session, and yields this socket to a block passed. # # In case of any problems with session initiation, {SphinxConnectError} # will be raised, because this is part of connection establishing. See # {#with_server} method details to get more infromation about how this # exception is handled. # # Socket retrieving routine is wrapped in a block with it's own # timeout value (see {#set_connect_timeout}). This is done in # {Server#get_socket} method, so check it for details. # # Request execution is wrapped with block with another timeout # (see {#set_request_timeout}). This ensures no Sphinx request will # take unreasonable time. # # In case of any Sphinx error (incomplete reply, internal or temporary # error), connection to the server will be re-established, and request # will be retried (see {#set_request_timeout}). Of course, if connection # could not be established, next server will be selected (see explanation # above). # # @param [Sphinx::Server] server contains information # about the server to perform request on. # @yield a block which will actually perform the request. # @yieldparam [Sphinx::BufferedIO] socket a socket used to # perform the request. # # @raise [SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError] # on any response error. # @raise [SphinxConnectError] on any connection error. # def with_socket(server) attempts = @reqretries socket = nil begin s = server.get_socket do |sock| # Remember socket to close it in case of emergency socket = sock # send my version # this is a subtle part. we must do it before (!) reading back from searchd. # because otherwise under some conditions (reported on FreeBSD for instance) # TCP stack could throttle write-write-read pattern because of Nagle. sock.write([1].pack('N')) v = sock.read(4).unpack('N*').first # Ouch, invalid protocol! if v < 1 raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'" end end Sphinx::safe_execute(@reqtimeout) do yield s end rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e logger.warn { "[sphinx] socket failure: #{e.message}" } if logger # Ouch, communication problem, will be treated as a connection problem. raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})" rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e # EOFError should not occur in ideal world, because we compare response length # with a value passed by Sphinx. But we want to ensure that client will not # fail with unexpected error when Sphinx implementation has bugs, aren't we? if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error) new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})") new_e.set_backtrace(e.backtrace) e = new_e end logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger # Close previously opened socket (in case of it has been really opened) server.free_socket(socket) # Request error! Do we need to try it again? attempts -= 1 retry if attempts > 0 # Re-raise original exception @error = e.message raise e ensure # Close previously opened socket on any other error server.free_socket(socket) end end # Enables ability to skip +set_+ prefix for methods inside {#query} block. # # @example # sphinx.query('test') do # match_mode :all # id_range 10, 100 # end # def method_missing(method_id, *arguments, &block) if @inside_eval and self.respond_to?("set_#{method_id}") self.send("set_#{method_id}", *arguments) else super end end end end