spec/unit/crawl_spec.rb in elasticrawl-1.0.0 vs spec/unit/crawl_spec.rb in elasticrawl-1.1.0
- old
+ new
@@ -3,20 +3,20 @@
describe Elasticrawl::Crawl do
it { should have_many(:crawl_segments) }
it { should have_db_column(:crawl_name).of_type(:string) }
describe '#has_segments?' do
- let(:crawl_name) { 'CC-MAIN-2013-20' }
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
subject { Elasticrawl::Crawl.new(:crawl_name => crawl_name) }
it 'should have segments' do
expect(subject.has_segments?).to eq true
end
end
describe '#create_segments' do
- let(:crawl_name) { 'CC-MAIN-2013-20' }
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
before do
subject.create_segments
end
@@ -28,21 +28,25 @@
it 'should create correct # of segments' do
expect(subject.crawl_segments.count).to eq 3
end
it 'should create segment names' do
- expect(subject.crawl_segments[0].segment_name).to eq '1368696381249'
+ expect(subject.crawl_segments[0].segment_name).to eq '1416400372202.67'
end
it 'should create segment s3 uris' do
expect(subject.crawl_segments[0].segment_s3_uri).to eq \
- 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2013-20/segments/1368696381249/'
+ 's3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2014-49/segments/1416400372202.67/'
end
+
+ it 'should set file counts' do
+ expect(subject.crawl_segments[0].file_count).to eq 3
+ end
end
describe '#next_segments' do
- let(:crawl_name) { 'CC-MAIN-2013-20' }
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
before do
subject.create_segments
end
@@ -50,24 +54,24 @@
it 'should return all segments' do
crawl_segments = subject.next_segments
expect(crawl_segments.count).to eq 3
expect(crawl_segments[0].crawl.crawl_name).to eq crawl_name
- expect(crawl_segments[0].segment_name).to eq '1368696381249'
+ expect(crawl_segments[0].segment_name).to eq '1416400372202.67'
end
it 'should return first # segments' do
crawl_segments = subject.next_segments(2)
expect(crawl_segments.count).to eq 2
expect(crawl_segments[0].crawl.crawl_name).to eq crawl_name
- expect(crawl_segments[0].segment_name).to eq '1368696381249'
+ expect(crawl_segments[0].segment_name).to eq '1416400372202.67'
end
end
describe '#select_segments' do
- let(:crawl_name) { 'CC-MAIN-2013-20' }
+ let(:crawl_name) { 'CC-MAIN-2014-49' }
subject { Elasticrawl::Crawl.create(:crawl_name => crawl_name) }
before do
subject.create_segments
end
@@ -78,27 +82,27 @@
expect(crawl_segments.count).to eq 0
end
it 'should select only segments in list' do
- segments_list = ['1368696381249', '1368696382185']
+ segments_list = ['1416400372202.67', '1416400372490.23']
crawl_segments = subject.select_segments(segments_list)
expect(crawl_segments.count).to eq 2
end
end
describe '#reset' do
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
let(:job) { Elasticrawl::ParseJob.new }
let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
before do
crawl.create_segments
job.set_segments(crawl.crawl_segments[0..1])
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
job.run
crawl.reset
end
@@ -107,30 +111,31 @@
expect(crawl.crawl_segments.count).to eq unparsed_segments
end
end
describe '.status' do
- let(:job_desc) { 'Crawl: CC-MAIN-2013-20 Segments: 2 Parsing: 5 files per segment' }
- let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2013-20') }
- let(:max_files) { 5 }
+ let(:job_desc) { 'Crawl: CC-MAIN-2014-49 Segments: 2 Parsing: 3 files per segment' }
+ let(:crawl) { Elasticrawl::Crawl.create(:crawl_name => 'CC-MAIN-2014-49') }
+ let(:max_files) { 3 }
let(:job) { Elasticrawl::ParseJob.new }
let(:job_flow_id) { 'j-3QHDKKBT6VAIS' }
before do
crawl.create_segments
job.set_segments(crawl.crawl_segments[0..1], max_files)
- Elasticity::JobFlow.any_instance.stubs(:run).returns(job_flow_id)
+ allow_any_instance_of(Elasticity::JobFlow).to receive(:run).and_return(job_flow_id)
job.run
end
it 'should display status of crawl segments' do
expect(Elasticrawl::Crawl.status.split("\n")[1]).to eq \
- 'CC-MAIN-2013-20 Segments: to parse 1, parsed 2, total 3'
+ 'CC-MAIN-2014-49 Segments: to parse 1, parsed 2, total 3'
end
it 'should display parse job desc' do
crawl_status = Elasticrawl::Crawl.status.split("\n")[4]
+
expect(crawl_status.include?(job.job_name)).to eq true
expect(crawl_status.include?(job.job_desc)).to eq true
end
end
end