elasticsearch.rb in logstash-input-elasticsearch-4.0.0

- old
+ new

@@ -10,22 +10,23 @@
 # [source,ruby]
 #     input {
 #       # Read all documents from Elasticsearch matching the given query
 #       elasticsearch {
 #         hosts => "localhost"
-#         query => '{ "query": { "match": { "statuscode": 200 } } }'
+#         query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }'
 #       }
 #     }
 #
 # This would create an Elasticsearch query with the following format:
 # [source,json]
 #     curl 'http://localhost:9200/logstash-*/_search?&scroll=1m&size=1000' -d '{
 #       "query": {
 #         "match": {
 #           "statuscode": 200
 #         }
-#       }
+#       },
+#       "sort": [ "_doc" ]
 #     }'
 #
 class LogStash::Inputs::Elasticsearch < LogStash::Inputs::Base
   config_name "elasticsearch"
 
@@ -38,32 +39,28 @@
 
   # The index or alias to search.
   config :index, :validate => :string, :default => "logstash-*"
 
   # The query to be executed. Read the Elasticsearch query DSL documentation
-  # for more info 
+  # for more info
   # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
-  config :query, :validate => :string, :default => '{"query": { "match_all": {} } }'
+  config :query, :validate => :string, :default => '{ "sort": [ "_doc" ] }'
 
-  # Enable the Elasticsearch "scan" search type.  This will disable
-  # sorting but increase speed and performance.
-  config :scan, :validate => :boolean, :default => true
-
   # This allows you to set the maximum number of hits returned per scroll.
   config :size, :validate => :number, :default => 1000
 
   # This parameter controls the keepalive time in seconds of the scrolling
   # request and initiates the scrolling process. The timeout applies per
-  # round trip (i.e. between the previous scan scroll request, to the next).
+  # round trip (i.e. between the previous scroll request, to the next).
   config :scroll, :validate => :string, :default => "1m"
 
   # If set, include Elasticsearch document information such as index, type, and
   # the id in the event.
   #
   # It might be important to note, with regards to metadata, that if you're
   # ingesting documents with the intent to re-index them (or just update them)
-  # that the `action` option in the elasticsearch output want's to know how to
+  # that the `action` option in the elasticsearch output wants to know how to
   # handle those things. It can be dynamically assigned with a field
   # added to the metadata.
   #
   # Example
   # [source, ruby]
@@ -115,12 +112,10 @@
       :body => @query,
       :scroll => @scroll,
       :size => @size
     }
 
-    @options[:search_type] = 'scan' if @scan
-
     transport_options = {}
 
     if @user && @password
       token = Base64.strict_encode64("#{@user}:#{@password.value}")
       transport_options[:headers] = { :Authorization => "Basic #{token}" }
@@ -144,17 +139,11 @@
 
   def run(output_queue)
     # get first wave of data
     r = @client.search(@options)
 
-    # since 'scan' doesn't return data on the search call, do an extra scroll
-    if @scan
-      r = process_next_scroll(output_queue, r['_scroll_id'])
-      has_hits = r['has_hits']
-    else # not a scan, process the response
-      r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
-      has_hits = r['hits']['hits'].any?
-    end
+    r['hits']['hits'].each { |hit| push_hit(hit, output_queue) }
+    has_hits = r['hits']['hits'].any?
 
     while has_hits && !stop?
       r = process_next_scroll(output_queue, r['_scroll_id'])
       has_hits = r['has_hits']
     end