Sha256: 9ecf43f7fe49f7dd305658450a205e4b8d597c75140a8bfa3bba1dcf9e06d805

Contents?: true

Size: 1.97 KB

Versions: 5

Compression:

Stored size: 1.97 KB

Contents

require 'spec_helper'

describe Elasticrawl::JobStep do
  it { should belong_to(:job) }
  it { should belong_to(:crawl_segment) }
  it { should have_db_column(:input_paths).of_type(:text) }
  it { should have_db_column(:output_path).of_type(:text) }

  describe '#job_flow_step' do
    let(:job) { Elasticrawl::ParseJob.create(:job_name => '1389789645620',
                                              :max_files => 3) }
    let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
    let(:crawl_segment) { crawl.crawl_segments[0] }
    let(:input_paths) {
      's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2014-49/segments/1368696381249/wet/*.warc.wet.gz'
    }
    let(:output_path) {
      's3://elasticrawl/data/1-parse/1389789645620/segments/1368696381249/'
    }
    let(:config) {
      { 'jar' => 's3://elasticrawl/jar/elasticrawl-example-0.0.1.jar',
        'class' => 'com.rossfairbanks.commoncrawl.elasticrawl.ParserDriver'
      }
    }

    let(:job_step) { Elasticrawl::JobStep.create(:job => job,
                                          :crawl_segment => crawl_segment,
                                          :input_paths => input_paths,
                                          :output_path => output_path) }
    subject { job_step.job_flow_step(config) } 

    it 'should be a CustomJarStep' do
      expect(subject).to be_a Elasticity::CustomJarStep
    end

    it 'should have a jar location' do
      expect(subject.jar).to eq config['jar']
    end

    it 'should have 4 jar args' do
      expect(subject.arguments.count).to eq 4
    end

    it 'should have a class argument' do
      expect(subject.arguments[0]).to eq config['class']
    end

    it 'should have an input path arg' do
      expect(subject.arguments[1]).to eq input_paths
    end

    it 'should have an output path arg' do
      expect(subject.arguments[2]).to eq output_path
    end

    it 'should have a max files arg' do
      expect(subject.arguments[3]).to eq '3'
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
elasticrawl-1.1.4 spec/unit/job_step_spec.rb
elasticrawl-1.1.3 spec/unit/job_step_spec.rb
elasticrawl-1.1.2 spec/unit/job_step_spec.rb
elasticrawl-1.1.1 spec/unit/job_step_spec.rb
elasticrawl-1.1.0 spec/unit/job_step_spec.rb