Sha256: d3cc739a485277728e0d072789f932cc745a7a882317988818cb65bdcab7e162

Contents?: true

Size: 1.75 KB

Versions: 5

Compression:

Stored size: 1.75 KB

Contents

Settings.define :cassandra_keyspace,   :required => true, :description => "The keyspace to bulk load"
Settings.define :cassandra_col_family, :required => true, :description => "The column family to bulk load"
Settings.define :cassandra_home,  :env_var => 'CASSANDRA_HOME', :default => '/usr/local/share/cassandra'

module Wukong
  class CassandraScript < Wukong::Script
    def hadoop_other_args *args
      opts = super(*args)
      opts << "-D stream.map.output=\'cassandra_avro_output\'"
      opts << "-D stream.io.identifier.resolver.class=\'org.apache.cassandra.hadoop.streaming.AvroResolver\'"
      opts << "-D cassandra.output.keyspace=\'#{Settings.cassandra_keyspace}\'"
      opts << "-D cassandra.output.columnfamily=\'#{Settings.cassandra_col_family}\'"
      opts << "-D cassandra.partitioner.class=\'org.apache.cassandra.dht.RandomPartitioner\'"
      opts << "-D cassandra.thrift.address=\'#{[Settings.cassandra_hosts].flatten.map{|s| s.gsub(/:.*/, '')}.join(",")}\'"
      opts << "-D cassandra.thrift.port=\'9160\'"
      # opts << "-D mapreduce.output.columnfamilyoutputformat.batch.threshold=\'1024\'"
      # ORDER MATTERS
      opts << "-libjars \'#{cassandra_jars}\'"
      opts << "-file    \'#{avro_schema}\'"
      opts << "-outputformat \'org.apache.cassandra.hadoop.ColumnFamilyOutputFormat\'"
      opts
    end

    #
    # Return paths to cassandra jars as a string
    #
    def cassandra_jars
      jars = []
      Dir["#{Settings.cassandra_home}/build/apache-cassandra*.jar", "#{Settings.cassandra_home}/build/lib/jars/*.jar", "#{Settings.cassandra_home}/lib/*.jar"].each do |jar|
        jars << jar
      end
      jars.join(',')
    end

    def avro_schema
      File.join(Settings.cassandra_home, "interface/avro/cassandra.avpr")
    end

  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
wukong-3.0.0.pre old/wukong/script/cassandra_loader_script.rb
wukong-2.0.2 lib/wukong/script/cassandra_loader_script.rb
wukong-2.0.1 lib/wukong/script/cassandra_loader_script.rb
wukong-2.0.0 lib/wukong/script/cassandra_loader_script.rb
wukong-1.5.4 lib/wukong/script/cassandra_loader_script.rb