spec/piglet_spec.rb in piglet-0.2.3 vs spec/piglet_spec.rb in piglet-0.2.4

- old
+ new

@@ -357,12 +357,140 @@ end @interpreter.to_pig_latin.should match(/\w+ BY x INNER/) @interpreter.to_pig_latin.should match(/\w+ BY y OUTER/) end end + + describe 'STREAM' do + it 'outputs a STREAM statement with a command reference' do + output = @interpreter.to_pig_latin do + a = load('in') + b = a.stream(:swoosch) + store(b, 'out') + end + output.should match(/STREAM \w+ THROUGH swoosch/) + end + + it 'outputs a STREAM statement with a command' do + output = @interpreter.to_pig_latin do + a = load('in') + b = a.stream(:command => 'swoosch') + store(b, 'out') + end + output.should match(/STREAM \w+ THROUGH `swoosch`/) + end + + it 'outputs a STREAM statement with a schema' do + output = @interpreter.to_pig_latin do + a = load('in') + b = a.stream(:command => 'swoosch', :schema => [:a, :b]) + store(b, 'out') + end + output.should match(/STREAM \w+ THROUGH `swoosch` AS \(a:bytearray, b:bytearray\)/) + end + + it 'outputs a STREAM statement with many relations' do + output = @interpreter.to_pig_latin do + x = load('in1') + y = load('in2') + z = load('in3') + w = x.stream([x, y], :plink) + store(w, 'out') + end + output.should match(/STREAM \w+, \w+, \w+ THROUGH plink/) + end + end end + context 'UDF statements:' do + describe 'DEFINE' do + it 'outputs a DEFINE with the correct alias and function name' do + output = @interpreter.to_pig_latin { define('plunk', :function => 'com.example.Plunk') } + output.should include('DEFINE plunk com.example.Plunk') + end + + it 'outputs a DEFINE with the correct alias and command string' do + output = @interpreter.to_pig_latin { define('plunk', :command => 'plunk.rb') } + output.should include('DEFINE plunk `plunk.rb`') + end + + it 'outputs a DEFINE with an INPUT definition' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :input => :stdin) + end + output.should include('DEFINE plunk `plunk.rb` INPUT(stdin)') + end + + it 'outputs a DEFINE with an OUTPUT definition' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :output => :stdout) + end + output.should include('DEFINE plunk `plunk.rb` OUTPUT(stdout)') + end + + it 'outputs a DEFINE with a SHIP definition with one path' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :ship => 'path/to/somewhere') + end + output.should include('DEFINE plunk `plunk.rb` SHIP(\'path/to/somewhere\')') + end + + it 'outputs a DEFINE with a SHIP definition with may paths' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :ship => ['path/to/somewhere', 'and/to/somewhere/else']) + end + output.should include('DEFINE plunk `plunk.rb` SHIP(\'path/to/somewhere\', \'and/to/somewhere/else\')') + end + + it 'outputs a DEFINE with a CACHE definition with one path description' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :cache => '/input/data.gz#data.gz') + end + output.should include('DEFINE plunk `plunk.rb` CACHE(\'/input/data.gz#data.gz\')') + end + + it 'outputs a DEFINE with a CACHE definition with may path descriptions' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :cache => ['/input/data.gz#data.gz', '/mydir/mydata.txt#mydata.txt']) + end + output.should include('DEFINE plunk `plunk.rb` CACHE(\'/input/data.gz#data.gz\', \'/mydir/mydata.txt#mydata.txt\')') + end + + it 'outputs a DEFINE with with a somewhat complex INPUT definition' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', :input => {:from => 'some/path', :using => :pig_storage}) + end + output.should include('DEFINE plunk `plunk.rb` INPUT(\'some/path\' USING PigStorage)') + end + + it 'outputs a DEFINE with with really complex options' do + output = @interpreter.to_pig_latin do + define('plunk', :command => 'plunk.rb', + :input => [ + {:from => 'some/path', :using => :pig_storage}, + {:from => :stdin, :using => 'HelloWorld(\'test\')'} + ], + :output => [ + {:to => 'some/other/path', :using => :bin_storage}, + {:to => :stdout, :using => 'SomeOtherMechanism()'} + ], + :ship => 'to/here', + :cache => ['first', 'second', 'third'] + ) + end + output.should include('DEFINE plunk `plunk.rb` INPUT(\'some/path\' USING PigStorage, stdin USING HelloWorld(\'test\')) OUTPUT(\'some/other/path\' USING BinStorage, stdout USING SomeOtherMechanism()) SHIP(\'to/here\') CACHE(\'first\', \'second\', \'third\')') + end + end + + describe 'REGISTER' do + it 'outputs a REGISTER statement with the path to the specified JAR' do + output = @interpreter.to_pig_latin { register('path/to/lib.jar') } + output.should include('REGISTER path/to/lib.jar') + end + end + end + context 'aliasing & multiple statements' do it 'aliases the loaded relation and uses the same alias in the STORE statement' do @interpreter.interpret { store(load('in'), 'out') } @interpreter.to_pig_latin.should match(/(\w+) = LOAD 'in';\nSTORE \1 INTO 'out';/) end @@ -701,9 +829,21 @@ relation2 = relation1.foreach { |r| [literal(3.14)] } throw :schema, relation2.schema end end schema.field_type(0).should eql(:double) + end + + it 'knows the schema of a relation streamed through a command (if there\'s a schema)' do + schema = catch(:schema) do + @interpreter.interpret do + relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) + relation2 = relation1.stream(:command => 'command', :schema => [[:x, :chararray]]) + throw :schema, relation2.schema + end + end + schema.field_names.should eql([:x]) + schema.field_type(:x).should eql(:chararray) end end end