#
# ElasticSearch config file
#

cluster:
  name:                         hoolock

  # http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/439afb06f3e85aa7/431a8543811d7848?lnk=gst&q=configuration#431a8543811d7848    
  routing:
    allocation:
      concurrent_recoveries:    1

# File paths
path:
  home:                         /usr/local/share/elasticsearch
  conf:                         /etc/elasticsearch
  logs:                         /var/log/elasticsearch
  # data:                       /mnt/elasticsearch/data
  # work:                       /mnt/elasticsearch/work
    
# http://www.elasticsearch.com/docs/elasticsearch/modules/node/
node:
  # # node.data: is this a data esnode (stores, indexes data)? default true
  data:                         true
  
# http://www.elasticsearch.com/docs/elasticsearch/modules/http/
http:
  # # http.enabled: is this a query esnode (has http interface, dispatches/gathers queries)? Default true
  enabled:                    true
  port:                       9200-9300
  max_content_length:         100mb

gateway:
  # The gateway set on the node level will automatically control the index
  # gateway to use. For example, if the fs gateway is used, then automatically,
  # each index created on the node will also use its own respective index level
  # fs gateway. In this case, if in an index should not persist its state, it
  # should be explicitly set to none.
  #
  # Set gateway.type to one of: [none, local, fs, hadoop, s3]
  #
  type:                         local
  #
  # recovery begins when recover_after_nodes are present and then either
  # recovery_after_time has passed *or* expected_nodes have shown up.
  recover_after_nodes:          24
  recovery_after_time:          10m         # 5m
  expected_nodes:               24          # 2
  #
  # # use with type: s3
  # s3:
  #   bucket:                     infochimps-search

# http://groups.google.com/a/elasticsearch.com/group/users/browse_thread/thread/1f3001f43266879a/06d62ea3ceb4db30?lnk=gst&q=translog#06d62ea3ceb4db30
indices:
  memory:
    # Increase if you are bulk loading
    # A number ('512m') or percent ('10%'). You can set limits on a percentage
    # with max_index_buffer_size and min_index_buffer_size. 10% by default.
    index_buffer_size:          512m

cache:
  memory:
    # buffer_size:              100k
    # cache_size:               50m
    # direct:                   true
    # warm_cache:               false
    
index:
  number_of_shards:             24
  number_of_replicas:           0
  translog:
    # A shard is flushed to local disk (the lucene index is committed) once this
    # number of operations accumulate in the translog. defaults to 5000
    #
    # If you have 
    flush_threshold:            200000     # 5000
  merge:
    policy:
      # Determines how often segment indices are merged by index operation. With
      # smaller values, less RAM is used while indexing, and searches on
      # unoptimized indices are faster, but indexing speed is slower. With
      # larger values, more RAM is used during indexing, and while searches on
      # unoptimized indices are slower, indexing is faster. Thus larger values
      # (greater than 10) are best for batch index creation, and smaller values
      # (lower than 10) for indices that are interactively maintained. Defaults
      # to 10.
      merge_factor:             30
      # Use the compound file format. If not set, controlled by the actually
      # store used, this is because the compound format was created to reduce
      # the number of open file handles when using file based storage. The file
      # system based ones default to true which others default to false. Even
      # with file system based ones, consider increasing the number of open file
      # handles and setting this to false for better performance
      use_compound_file:        false
      # A size setting type which sets the minimum size for the lowest level
      # segments. Any segments below this size are considered to be on the same
      # level (even if they vary drastically in size) and will be merged
      # whenever there are mergeFactor of them. This effectively truncates the
      # “long tail” of small segments that would otherwise be created into a
      # single level. If you set this too large, it could greatly increase the
      # merging cost during indexing (if you flush many small
      # segments). Defaults to 1.6mb
      min_merge_size:           2.7mb
      # Largest segment (by total byte size) that may be merged with other
      # segments. Defaults to unbounded.
      # max_merge_size:
      # Largest segment (by document count) that may be merged with other
      # segments. Defaults to unbounded
      # max_merge_docs
    scheduler:
      max_thread_count:         64
  # deletionpolicy:             keep_only_last

  engine:
    robin:
      # How often to schedule the refresh operation (the same one the Refresh
      # API, which enables near real time search).  Default '1s'; set to -1 to
      # disable automatic refresh (you must instead initiate refresh via API)
      refresh_interval:         -1
      # Set the interval between indexed terms. Large values cause less memory
      # to be used by a reader / searcher, but slow random-access to
      # terms. Small values cause more memory to be used by a reader / searcher,
      # and speed random-access to terms. Defaults to 128.
      term_index_interval:      1024

  gateway:
    # The index.gateway.snapshot_interval is a time setting allowing to
    # configure the interval at which snapshotting of the index shard to the
    # gateway will take place. Note, only primary shards start this scheduled
    # snapshotting process. It defaults to 10s, and can be disabled by setting
    # it to -1.
    snapshot_interval:          -1
    # When a primary shard is shut down explicitly (not relocated), the
    # index.gateway.snapshot_on_close flag can control if while shutting down, a
    # gateway snapshot should be performed. It defaults to true.
    snapshot_on_close:          true

# http://www.elasticsearch.com/docs/elasticsearch/modules/node/network/      
network:
  bind_host:                    _local_
  publish_host:                 _local_
  #
  # tcp:
  #   no_delay:                 true
  #   keep_alive:               ~
  #   reuse_address             true
  #   send_buffer_size          ~
  #   receive_buffer_size:      ~

# http://www.elasticsearch.com/docs/elasticsearch/modules/transport/
transport:
  tcp:
    port:                       9300-9400
    connect_timeout:            1s
    # # enable lzf compression in esnode-esnode communication?
    compress:                   false

# http://www.elasticsearch.com/docs/elasticsearch/modules/jmx/
jmx:
  # Create an RMI connector?
  create_connector:           true
  port:                       9400-9500
  domain:                     elasticsearch

# http://www.elasticsearch.com/docs/elasticsearch/modules/threadpool/
threadpool:
  # #
  # # threadpool.type should be one of [cached, scaling, blocking]:
  # #
  # # * Cached:  An unbounded thread pool that reuses previously constructed threads.
  # # * Scaling: A bounded thread pool that reuses previously created free threads.
  # # * Blocking: A bounded thread pool that reuses previously created free
  # #   threads. Pending requests block for an available thread (different than
  # #   the scaling one, where the request is added to a queue and does not
  # #   block).
  # #
  # type:                       cached

# http://www.elasticsearch.com/docs/elasticsearch/modules/discovery/
discovery:
  # set to 'zen' or 'ec2'
  type:                         zen
  zen:
    ping:
      multicast:
        enabled:                false
      unicast:
        hosts:                  10.195.215.175:9300,10.243.57.219:9300,10.194.218.143:9300,10.204.223.175:9300,10.242.89.235:9300,10.212.226.127:9300
    # There are two fault detection processes running. The first is by the
    # master, to ping all the other nodes in the cluster and verify that they
    # are alive. And on the other end, each node pings to master to verify if
    # its still alive or an election process needs to be initiated.
    fd:
      # How often a node gets pinged. Defaults to "1s".
      ping_interval:            3s
      # How long to wait for a ping response, defaults to "30s".
      ping_timeout:             10s
      # How many ping failures / timeouts cause a node to be considered failed. Defaults to 3.
      ping_retries:             3
  #
  # # ec2 discovery can cause big trouble with the hadoop loader:
  # # discovery churn can hit API usage limits
  # # Be sure to set your cloud keys if you're using ec2
  # #
  # ec2:
  #   # security groups used for discovery
  #   groups:                     hoolock-data_esnode
  #   # require *all* (false) or *any* (true) of those groups?
  #   any_group:                  true
  #   # private_ip, public_ip, private_dns, public_dns
  #   host_type:                  private_ip
  #   availability_zones:         us-east-1d

# Necessary if you will use either of
# * the ec2 discovery module: for finding peers
# * the s3 gateway module, for pushing indices to an s3 mirror.
# Read more: http://www.elasticsearch.com/docs/elasticsearch/cloud/
# 
cloud:
  aws:
    access_key:                 <%= @aws['aws_access_key_id'] %>
    secret_key:                 <%= @aws['aws_secret_access_key'] %>

# monitor.jvm: gc_threshold, interval, enabled
# thrift:
#   # port: