# # ElasticSearch config file # cluster: name: hoolock # http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/439afb06f3e85aa7/431a8543811d7848?lnk=gst&q=configuration#431a8543811d7848 routing: allocation: concurrent_recoveries: 1 # File paths path: home: /usr/local/share/elasticsearch conf: /etc/elasticsearch logs: /var/log/elasticsearch # data: /mnt/elasticsearch/data # work: /mnt/elasticsearch/work # http://www.elasticsearch.com/docs/elasticsearch/modules/node/ node: # # node.data: is this a data esnode (stores, indexes data)? default true data: true # http://www.elasticsearch.com/docs/elasticsearch/modules/http/ http: # # http.enabled: is this a query esnode (has http interface, dispatches/gathers queries)? Default true enabled: true port: 9200-9300 max_content_length: 100mb gateway: # The gateway set on the node level will automatically control the index # gateway to use. For example, if the fs gateway is used, then automatically, # each index created on the node will also use its own respective index level # fs gateway. In this case, if in an index should not persist its state, it # should be explicitly set to none. # # Set gateway.type to one of: [none, local, fs, hadoop, s3] # type: local # # recovery begins when recover_after_nodes are present and then either # recovery_after_time has passed *or* expected_nodes have shown up. recover_after_nodes: 24 recovery_after_time: 10m # 5m expected_nodes: 24 # 2 # # # use with type: s3 # s3: # bucket: infochimps-search # http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/1f3001f43266879a/06d62ea3ceb4db30?lnk=gst&q=translog#06d62ea3ceb4db30 indices: memory: # Increase if you are bulk loading # A number ('512m') or percent ('10%'). You can set limits on a percentage # with max_index_buffer_size and min_index_buffer_size. 10% by default. index_buffer_size: 512m cache: memory: # buffer_size: 100k # cache_size: 50m # direct: true # warm_cache: false index: number_of_shards: 24 number_of_replicas: 0 translog: # A shard is flushed to local disk (the lucene index is committed) once this # number of operations accumulate in the translog. defaults to 5000 # # If you have flush_threshold: 200000 # 5000 merge: policy: # Determines how often segment indices are merged by index operation. With # smaller values, less RAM is used while indexing, and searches on # unoptimized indices are faster, but indexing speed is slower. With # larger values, more RAM is used during indexing, and while searches on # unoptimized indices are slower, indexing is faster. Thus larger values # (greater than 10) are best for batch index creation, and smaller values # (lower than 10) for indices that are interactively maintained. Defaults # to 10. merge_factor: 30 # Use the compound file format. If not set, controlled by the actually # store used, this is because the compound format was created to reduce # the number of open file handles when using file based storage. The file # system based ones default to true which others default to false. Even # with file system based ones, consider increasing the number of open file # handles and setting this to false for better performance use_compound_file: false # A size setting type which sets the minimum size for the lowest level # segments. Any segments below this size are considered to be on the same # level (even if they vary drastically in size) and will be merged # whenever there are mergeFactor of them. This effectively truncates the # “long tail” of small segments that would otherwise be created into a # single level. If you set this too large, it could greatly increase the # merging cost during indexing (if you flush many small # segments). Defaults to 1.6mb min_merge_size: 2.7mb # Largest segment (by total byte size) that may be merged with other # segments. Defaults to unbounded. # max_merge_size: # Largest segment (by document count) that may be merged with other # segments. Defaults to unbounded # max_merge_docs scheduler: max_thread_count: 64 # deletionpolicy: keep_only_last engine: robin: # How often to schedule the refresh operation (the same one the Refresh # API, which enables near real time search). Default '1s'; set to -1 to # disable automatic refresh (you must instead initiate refresh via API) refresh_interval: -1 # Set the interval between indexed terms. Large values cause less memory # to be used by a reader / searcher, but slow random-access to # terms. Small values cause more memory to be used by a reader / searcher, # and speed random-access to terms. Defaults to 128. term_index_interval: 1024 gateway: # The index.gateway.snapshot_interval is a time setting allowing to # configure the interval at which snapshotting of the index shard to the # gateway will take place. Note, only primary shards start this scheduled # snapshotting process. It defaults to 10s, and can be disabled by setting # it to -1. snapshot_interval: -1 # When a primary shard is shut down explicitly (not relocated), the # index.gateway.snapshot_on_close flag can control if while shutting down, a # gateway snapshot should be performed. It defaults to true. snapshot_on_close: true # http://www.elasticsearch.com/docs/elasticsearch/modules/node/network/ network: bind_host: _local_ publish_host: _local_ # # tcp: # no_delay: true # keep_alive: ~ # reuse_address true # send_buffer_size ~ # receive_buffer_size: ~ # http://www.elasticsearch.com/docs/elasticsearch/modules/transport/ transport: tcp: port: 9300-9400 connect_timeout: 1s # # enable lzf compression in esnode-esnode communication? compress: false # http://www.elasticsearch.com/docs/elasticsearch/modules/jmx/ jmx: # Create an RMI connector? create_connector: true port: 9400-9500 domain: elasticsearch # http://www.elasticsearch.com/docs/elasticsearch/modules/threadpool/ threadpool: # # # # threadpool.type should be one of [cached, scaling, blocking]: # # # # * Cached: An unbounded thread pool that reuses previously constructed threads. # # * Scaling: A bounded thread pool that reuses previously created free threads. # # * Blocking: A bounded thread pool that reuses previously created free # # threads. Pending requests block for an available thread (different than # # the scaling one, where the request is added to a queue and does not # # block). # # # type: cached # http://www.elasticsearch.com/docs/elasticsearch/modules/discovery/ discovery: # set to 'zen' or 'ec2' type: zen zen: ping: multicast: enabled: false unicast: hosts: 10.195.215.175:9300,10.243.57.219:9300,10.194.218.143:9300,10.204.223.175:9300,10.242.89.235:9300,10.212.226.127:9300 # There are two fault detection processes running. The first is by the # master, to ping all the other nodes in the cluster and verify that they # are alive. And on the other end, each node pings to master to verify if # its still alive or an election process needs to be initiated. fd: # How often a node gets pinged. Defaults to "1s". ping_interval: 3s # How long to wait for a ping response, defaults to "30s". ping_timeout: 10s # How many ping failures / timeouts cause a node to be considered failed. Defaults to 3. ping_retries: 3 # # # ec2 discovery can cause big trouble with the hadoop loader: # # discovery churn can hit API usage limits # # Be sure to set your cloud keys if you're using ec2 # # # ec2: # # security groups used for discovery # groups: hoolock-data_esnode # # require *all* (false) or *any* (true) of those groups? # any_group: true # # private_ip, public_ip, private_dns, public_dns # host_type: private_ip # availability_zones: us-east-1d # Necessary if you will use either of # * the ec2 discovery module: for finding peers # * the s3 gateway module, for pushing indices to an s3 mirror. # Read more: http://www.elasticsearch.com/docs/elasticsearch/cloud/ # cloud: aws: access_key: <%= @aws['aws_access_key_id'] %> secret_key: <%= @aws['aws_secret_access_key'] %> # monitor.jvm: gc_threshold, interval, enabled # thrift: # # port: