<% @path = "#{rubber_env.cassandra_dir}/conf/cassandra.yaml" @post = "mkdir -p #{rubber_env.cassandra_data_dir} #{rubber_env.cassandra_commitlog_dir} #{rubber_env.cassandra_log_dir}" %> # Cassandra storage config YAML # See http://wiki.apache.org/cassandra/StorageConfiguration for # explanations of configuration directives. # name of the cluster cluster_name: '<%= rubber_env.app_name %>_<%= Rubber.env %>_cluster' # Set to true to make new [non-seed] nodes automatically migrate the # right data to themselves. auto_bootstrap: <%= rubber_instances.for_role("cassandra").size > 1 ? "true" : "false" %> # authentication backend, implementing IAuthenticator; used to limit keyspace access authenticator: org.apache.cassandra.auth.AllowAllAuthenticator # any IPartitioner may be used, including your own as long as it is on # the classpath. Out of the box, Cassandra provides # org.apache.cassandra.dht.RandomPartitioner # org.apache.cassandra.dht.OrderPreservingPartitioner, and # org.apache.cassandra.dht.CollatingOrderPreservingPartitioner. partitioner: org.apache.cassandra.dht.RandomPartitioner # directories where Cassandra should store data on disk. data_file_directories: - <%= rubber_env.cassandra_data_dir %> # Addresses of hosts that are deemed contact points. # Cassandra nodes use this list of hosts to find each other and learn # the topology of the ring. You must change this if you are running # multiple nodes! <% seeds = rubber_instances.for_role('cassandra_seed') seeds = rubber_instances.for_role('cassandra') if seeds.size == 0 seed_hosts = seeds.collect { |i| i.full_name } %> seeds: <% seed_hosts.each do |host| %> - <%= host %> <% end %> # Access mode. mmapped i/o is substantially faster, but only practical on # a 64bit machine (which notably does not include EC2 "small" instances) # or relatively small datasets. "auto", the safe choice, will enable # mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only" # (which may allow you to get part of the benefits of mmap on a 32bit # machine by mmapping only index files) and "standard". # (The buffer size settings that follow only apply to standard, # non-mmapped i/o.) disk_access_mode: auto # Unlike most systems, in Cassandra writes are faster than reads, so # you can afford more of those in parallel. A good rule of thumb is 2 # concurrent reads per processor core. Increase ConcurrentWrites to # the number of clients writing at once if you enable CommitLogSync + # CommitLogSyncDelay. --> concurrent_reads: 8 concurrent_writes: 32 # Buffer size to use when performing contiguous column slices. # Increase this to the size of the column slices you typically perform sliced_buffer_size_in_kb: 64 # TCP port, for commands and data storage_port: <%= rubber_env.cassandra_storage_port %> # Address to bind to and tell other nodes to connect to. You _must_ # change this if you want multiple nodes to be able to communicate! listen_address: <%= rubber_env.full_host %> # The address to bind the Thrift RPC service to rpc_address: <%= rubber_env.full_host %> # port for Thrift to listen on rpc_port: <%= rubber_env.cassandra_rpc_port %> # Whether or not to use a framed transport for Thrift. thrift_framed_transport: false snapshot_before_compaction: false # The threshold size in megabytes the binary memtable must grow to, # before it's submitted for flushing to disk. binary_memtable_throughput_in_mb: 256 # Number of minutes to keep a memtable in memory memtable_flush_after_mins: 60 # Size of the memtable in memory before it is dumped memtable_throughput_in_mb: 64 # Number of objects in millions in the memtable before it is dumped memtable_operations_in_millions: 0.3 # Buffer size to use when flushing !memtables to disk. flush_data_buffer_size_in_mb: 32 # Increase (decrease) the index buffer size relative to the data # buffer if you have few (many) columns per key. flush_index_buffer_size_in_mb: 8 column_index_size_in_kb: 64 row_warning_threshold_in_mb: 512 # commit log commitlog_directory: <%= rubber_env.cassandra_commitlog_dir %> # Size to allow commitlog to grow to before creating a new segment commitlog_rotation_threshold_in_mb: 128 # commitlog_sync may be either "periodic" or "batch." # When in batch mode, Cassandra won't ack writes until the commit log # has been fsynced to disk. It will wait up to # CommitLogSyncBatchWindowInMS milliseconds for other writes, before # performing the sync. commitlog_sync: periodic # the other option is "timed," where writes may be acked immediately # and the CommitLog is simply synced every commitlog_sync_period_in_ms # milliseconds. commitlog_sync_period_in_ms: 10000 # Time to wait for a reply from other nodes before failing the command rpc_timeout_in_ms: 10000 # time to wait before garbage collecting tombstones (deletion markers) gc_grace_seconds: 864000 # endpoint_snitch -- Set this to a class that implements # IEndpointSnitch, which will let Cassandra know enough # about your network topology to route requests efficiently. # Out of the box, Cassandra provides # org.apache.cassandra.locator.SimpleSnitch, # org.apache.cassandra.locator.RackInferringSnitch, and # org.apache.cassandra.locator.PropertyFileSnitch. endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch # A ColumnFamily is the Cassandra concept closest to a relational table. # # Keyspaces are separate groups of ColumnFamilies. Except in very # unusual circumstances you will have one Keyspace per application. # # Keyspace required parameters: # - name: name of the keyspace; "system" and "definitions" are # reserved for Cassandra Internals. # - replica_placement_strategy: the class that determines how replicas # are distributed among nodes. Must implement IReplicaPlacementStrategy. # Out of the box, Cassandra provides # org.apache.cassandra.locator.RackUnawareStrategy and # org.apache.cassandra.locator.RackAwareStrategy. RackAwareStrategy # place one replica in each of two datacenter, and other replicas in # different racks in one. # - replication_factor: Number of replicas of each row # - column_families: column families associated with this keyspace # # ColumnFamily required parameters: # - name: name of the ColumnFamily. Must not contain the character "-". # - compare_with: tells Cassandra how to sort the columns for slicing # operations. The default is BytesType, which is a straightforward # lexical comparison of the bytes in each column. Other options are # AsciiType, UTF8Type, LexicalUUIDType, TimeUUIDType, and LongType. # You can also specify the fully-qualified class name to a class of # your choice extending org.apache.cassandra.db.marshal.AbstractType. # # ColumnFamily optional parameters: # - keys_cached: specifies the number of keys per sstable whose # locations we keep in memory in "mostly LRU" order. (JUST the key # locations, NOT any column values.) Specify a fraction (value less # than 1) or an absolute number of keys to cache. Defaults to 200000 # keys. # - rows_cached: specifies the number of rows whose entire contents we # cache in memory. Do not use this on ColumnFamilies with large rows, # or ColumnFamilies with high write:read ratios. Specify a fraction # (value less than 1) or an absolute number of rows to cache. # Defaults to 0. (i.e. row caching is off by default) # - comment: used to attach additional human-readable information about # the column family to its definition. # - read_repair_chance: specifies the probability with which read # repairs should be invoked on non-quorum reads. must be between 0 # and 1. defaults to 1.0 (always read repair). # - preload_row_cache: If true, will populate row cache on startup. # Defaults to false. # keyspaces: - name: <%= rubber_env.app_name.capitalize %> replica_placement_strategy: org.apache.cassandra.locator.RackUnawareStrategy replication_factor: 1 column_families: - name: Standard1 compare_with: BytesType - name: Standard2 compare_with: UTF8Type read_repair_chance: 0.1 keys_cached: 100 - name: StandardByUUID1 compare_with: TimeUUIDType - name: Super1 column_type: Super compare_with: BytesType compare_subcolumns_with: BytesType - name: Super2 column_type: Super compare_subcolumns_with: UTF8Type preload_row_cache: true rows_cached: 10000 keys_cached: 50 comment: 'A column family with supercolumns, whose column and subcolumn names are UTF8 strings' - name: Super3 column_type: Super compare_with: LongType comment: 'A column family with supercolumns, whose column names are Longs (8 bytes)'