spec/piglet_spec.rb in piglet-0.2.3 vs spec/piglet_spec.rb in piglet-0.2.4
- old
+ new
@@ -357,12 +357,140 @@
end
@interpreter.to_pig_latin.should match(/\w+ BY x INNER/)
@interpreter.to_pig_latin.should match(/\w+ BY y OUTER/)
end
end
+
+ describe 'STREAM' do
+ it 'outputs a STREAM statement with a command reference' do
+ output = @interpreter.to_pig_latin do
+ a = load('in')
+ b = a.stream(:swoosch)
+ store(b, 'out')
+ end
+ output.should match(/STREAM \w+ THROUGH swoosch/)
+ end
+
+ it 'outputs a STREAM statement with a command' do
+ output = @interpreter.to_pig_latin do
+ a = load('in')
+ b = a.stream(:command => 'swoosch')
+ store(b, 'out')
+ end
+ output.should match(/STREAM \w+ THROUGH `swoosch`/)
+ end
+
+ it 'outputs a STREAM statement with a schema' do
+ output = @interpreter.to_pig_latin do
+ a = load('in')
+ b = a.stream(:command => 'swoosch', :schema => [:a, :b])
+ store(b, 'out')
+ end
+ output.should match(/STREAM \w+ THROUGH `swoosch` AS \(a:bytearray, b:bytearray\)/)
+ end
+
+ it 'outputs a STREAM statement with many relations' do
+ output = @interpreter.to_pig_latin do
+ x = load('in1')
+ y = load('in2')
+ z = load('in3')
+ w = x.stream([x, y], :plink)
+ store(w, 'out')
+ end
+ output.should match(/STREAM \w+, \w+, \w+ THROUGH plink/)
+ end
+ end
end
+ context 'UDF statements:' do
+ describe 'DEFINE' do
+ it 'outputs a DEFINE with the correct alias and function name' do
+ output = @interpreter.to_pig_latin { define('plunk', :function => 'com.example.Plunk') }
+ output.should include('DEFINE plunk com.example.Plunk')
+ end
+
+ it 'outputs a DEFINE with the correct alias and command string' do
+ output = @interpreter.to_pig_latin { define('plunk', :command => 'plunk.rb') }
+ output.should include('DEFINE plunk `plunk.rb`')
+ end
+
+ it 'outputs a DEFINE with an INPUT definition' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :input => :stdin)
+ end
+ output.should include('DEFINE plunk `plunk.rb` INPUT(stdin)')
+ end
+
+ it 'outputs a DEFINE with an OUTPUT definition' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :output => :stdout)
+ end
+ output.should include('DEFINE plunk `plunk.rb` OUTPUT(stdout)')
+ end
+
+ it 'outputs a DEFINE with a SHIP definition with one path' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :ship => 'path/to/somewhere')
+ end
+ output.should include('DEFINE plunk `plunk.rb` SHIP(\'path/to/somewhere\')')
+ end
+
+ it 'outputs a DEFINE with a SHIP definition with may paths' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :ship => ['path/to/somewhere', 'and/to/somewhere/else'])
+ end
+ output.should include('DEFINE plunk `plunk.rb` SHIP(\'path/to/somewhere\', \'and/to/somewhere/else\')')
+ end
+
+ it 'outputs a DEFINE with a CACHE definition with one path description' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :cache => '/input/data.gz#data.gz')
+ end
+ output.should include('DEFINE plunk `plunk.rb` CACHE(\'/input/data.gz#data.gz\')')
+ end
+
+ it 'outputs a DEFINE with a CACHE definition with may path descriptions' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :cache => ['/input/data.gz#data.gz', '/mydir/mydata.txt#mydata.txt'])
+ end
+ output.should include('DEFINE plunk `plunk.rb` CACHE(\'/input/data.gz#data.gz\', \'/mydir/mydata.txt#mydata.txt\')')
+ end
+
+ it 'outputs a DEFINE with with a somewhat complex INPUT definition' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb', :input => {:from => 'some/path', :using => :pig_storage})
+ end
+ output.should include('DEFINE plunk `plunk.rb` INPUT(\'some/path\' USING PigStorage)')
+ end
+
+ it 'outputs a DEFINE with with really complex options' do
+ output = @interpreter.to_pig_latin do
+ define('plunk', :command => 'plunk.rb',
+ :input => [
+ {:from => 'some/path', :using => :pig_storage},
+ {:from => :stdin, :using => 'HelloWorld(\'test\')'}
+ ],
+ :output => [
+ {:to => 'some/other/path', :using => :bin_storage},
+ {:to => :stdout, :using => 'SomeOtherMechanism()'}
+ ],
+ :ship => 'to/here',
+ :cache => ['first', 'second', 'third']
+ )
+ end
+ output.should include('DEFINE plunk `plunk.rb` INPUT(\'some/path\' USING PigStorage, stdin USING HelloWorld(\'test\')) OUTPUT(\'some/other/path\' USING BinStorage, stdout USING SomeOtherMechanism()) SHIP(\'to/here\') CACHE(\'first\', \'second\', \'third\')')
+ end
+ end
+
+ describe 'REGISTER' do
+ it 'outputs a REGISTER statement with the path to the specified JAR' do
+ output = @interpreter.to_pig_latin { register('path/to/lib.jar') }
+ output.should include('REGISTER path/to/lib.jar')
+ end
+ end
+ end
+
context 'aliasing & multiple statements' do
it 'aliases the loaded relation and uses the same alias in the STORE statement' do
@interpreter.interpret { store(load('in'), 'out') }
@interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/)
end
@@ -701,9 +829,21 @@
relation2 = relation1.foreach { |r| [literal(3.14)] }
throw :schema, relation2.schema
end
end
schema.field_type(0).should eql(:double)
+ end
+
+ it 'knows the schema of a relation streamed through a command (if there\'s a schema)' do
+ schema = catch(:schema) do
+ @interpreter.interpret do
+ relation1 = load('in1', :schema => [[:a, :float], [:b, :int]])
+ relation2 = relation1.stream(:command => 'command', :schema => [[:x, :chararray]])
+ throw :schema, relation2.schema
+ end
+ end
+ schema.field_names.should eql([:x])
+ schema.field_type(:x).should eql(:chararray)
end
end
end