spec/piglet_spec.rb in piglet-0.2.5 vs spec/piglet_spec.rb in piglet-0.3.0

- old
+ new

@@ -241,65 +241,110 @@ end end describe 'FOREACH … GENERATE' do it 'outputs a FOREACH … GENERATE statement' do - @interpreter.interpret { dump(load('in').foreach { |r| :a }) } + @interpreter.interpret { dump(load('in').foreach { :a }) } @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a/) end it 'outputs a FOREACH … GENERATE statement with a list of fields' do - @interpreter.interpret { dump(load('in').foreach { |r| [:a, :b, :c] }) } + @interpreter.interpret { dump(load('in').foreach { [:a, :b, :c] }) } @interpreter.to_pig_latin.should match(/FOREACH \w+ GENERATE a, b, c/) end it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation' do - @interpreter.interpret { dump(load('in').foreach { |r| [r.a, r.b, r.c] }) } + @interpreter.interpret { dump(load('in').foreach { [a, b, c] }) } @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a, b, c/) end it 'outputs a FOREACH … GENERATE statement with fields resolved from the relation with positional syntax' do - @interpreter.interpret { dump(load('in').foreach { |r| [r[0], r[1], r[2]] }) } + @interpreter.interpret { dump(load('in').foreach { [self[0], self[1], self[2]] }) } @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE \$0, \$1, \$2/) end it 'outputs a FOREACH … GENERATE statement with aggregate functions applied to the fields' do - @interpreter.interpret { dump(load('in').foreach { |r| [r.a.max, r.b.min, r.c.avg] }) } + @interpreter.interpret { dump(load('in').foreach { [a.max, b.min, c.avg] }) } @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE MAX\(a\), MIN\(b\), AVG\(c\)/) end it 'outputs a FOREACH … GENERATE statement with fields that access inner fields' do - @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b, r.b.c, r.c.d] }) } + @interpreter.interpret { dump(load('in').foreach { [a.b, b.c, c.d] }) } @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b, b.c, c.d/) end it 'outputs a FOREACH … GENERATE statement that includes field aliasing' do - @interpreter.interpret { dump(load('in').foreach { |r| [r.a.b.as(:c), r.a.b.as(:d)] }) } + @interpreter.interpret { dump(load('in').foreach { [a.b.as(:c), a.b.as(:d)] }) } @interpreter.to_pig_latin.should match(/FOREACH (\w+) GENERATE a.b AS c, a.b AS d/) end end + + describe 'FOREACH ... { ... GENERATE }' do + it 'outputs a FOREACH ... { ... GENERATE } statement for named fields' do + @interpreter.interpret { dump(load('in').nested_foreach { [a, b, c] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = b;\s+(\w+) = c;\s+GENERATE \1, \2, \3;\s+\}/m) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement for positional fields' do + @interpreter.interpret { dump(load('in').nested_foreach { [self[0], self[1], self[2]] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = \$0\;\s+(\w+) = \$1\;\s+(\w+) = \$2\;\s+GENERATE \1, \2, \3\;\s+\}/m) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with aggregate functions applied to fields' do + @interpreter.interpret { dump(load('in').nested_foreach { [a.max, b.min, c.avg] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = MAX\(\1\);\s+(\w+) = b;\s+(\w+) = MIN\(\3\);\s+(\w+) = c;\s+(\w+) = AVG\(\5\);\s+GENERATE \2, \4, \6;\s+\}/m) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with fields that access inner fields' do + @interpreter.interpret { dump(load('in').nested_foreach { [a.b, b.c]}) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = \1.b;\s+(\w+) = b;\s+(\w+) = \3.c;\s+GENERATE \2, \4;\s+\}/m) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with user defined functions' do + @interpreter.interpret do + define('my_udf', :function => 'com.example.My') + dump(load('in').nested_foreach { [my_udf(a, 3, "hello")] }) + end + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = my_udf\(\1, 3, 'hello'\);\s+GENERATE \2;\s+\}/) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with bag methods' do + @interpreter.interpret { dump(load('in').nested_foreach { [self[1].distinct.sample(0.3).limit(5).order(:x).filter { x == 5 }] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = \$1;\s+(\w+) = DISTINCT \1;\s+(\w+) = SAMPLE \2 0.3;\s+(\w+) = LIMIT \3 5;\s+(\w+) = ORDER \4 BY x;\s+(\w+) = FILTER \5 BY x == 5;\s+GENERATE \6;\s+\}/m) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with field aliasing' do + @interpreter.interpret { dump(load('in').nested_foreach { a = b.distinct; [a.as(:c)] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = b;\s+(\w+) = DISTINCT \1;\s+GENERATE \2 AS c;\s+\}/) + end + + it 'outputs a FOREACH ... { ... GENERATE } statement with flatten' do + @interpreter.interpret { dump(load('in').nested_foreach { [a.flatten] }) } + @interpreter.to_pig_latin.should match(/FOREACH \w+ \{\s+(\w+) = a;\s+(\w+) = FLATTEN\(\1\);\s+GENERATE \2;\s+\}/m) + end + end describe 'FILTER' do it 'outputs a FILTER statement' do - @interpreter.interpret { dump(load('in').filter { |r| r.a == 3 }) } + @interpreter.interpret { dump(load('in').filter { a == 3 }) } @interpreter.to_pig_latin.should match(/FILTER \w+ BY a == 3/) end it 'outputs a FILTER statement with a complex test' do - @interpreter.interpret { dump(load('in').filter { |r| (r.a > r.b).and(r.c.ne(3)) }) } + @interpreter.interpret { dump(load('in').filter { (a > b).and(c.ne(3)) }) } @interpreter.to_pig_latin.should match(/FILTER \w+ BY \(a > b\) AND \(c != 3\)/) end end describe 'SPLIT' do it 'outputs a SPLIT statement' do @interpreter.interpret do - a, b = load('in').split { |r| [r.a >= 0, r.a < 0]} + a, b = load('in').split { [first >= 0, second < 0] } dump(a) dump(b) end - @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF a >= 0, \w+ IF a < 0/) + @interpreter.to_pig_latin.should match(/SPLIT \w+ INTO \w+ IF first >= 0, \w+ IF second < 0/) end end describe 'ORDER' do it 'outputs an ORDER statement' do @@ -515,11 +560,11 @@ it 'makes the defined UDF available as a method in the interpreter scope, so that it can be used in a FOREACH and it\'s result renamed using AS' do output = @interpreter.to_pig_latin do define('my_udf', :function => 'com.example.My') a = load('in') - b = a.foreach { |r| [my_udf('foo', 3, 'hello \'world\'', r[0]).as(:bar)]} + b = a.foreach { [my_udf('foo', 3, 'hello \'world\'', self[0]).as(:bar)]} store(b, 'out') end output.should match(/FOREACH \w+ GENERATE my_udf\('foo', 3, 'hello \\'world\\'', \$0\) AS bar/) end end @@ -566,39 +611,39 @@ end context 'field expressions' do it 'parenthesizes expressions with different operators' do output = @interpreter.to_pig_latin do - store(load('in').filter { |r| r.x.and(r.y.or(r.z)).and(r.w) }, 'out') + store(load('in').filter { self.x.and(self.y.or(self.z)).and(self.w) }, 'out') end output.should include('x AND (y OR z) AND w') end it 'doesn\'t parenthesizes expressions with the same operator' do output = @interpreter.to_pig_latin do - store(load('in').filter { |r| r.x.and(r.y.and(r.z)).and(r.w) }, 'out') + store(load('in').filter { self.x.and(self.y.and(self.z)).and(self.w) }, 'out') end output.should include('x AND y AND z AND w') end it 'doesn\'t parenthesize function calls' do output = @interpreter.to_pig_latin do - store(load('in').foreach { |r| [r.x.max + r.y.min] }, 'out') + store(load('in').foreach { [self.x.max + self.y.min] }, 'out') end output.should include('MAX(x) + MIN(y)') end it 'doesn\'t parenthesize a suffix expression followed by an infix expression' do output = @interpreter.to_pig_latin do - store(load('in').foreach { |r| [r.x.null?.or(r.y)] }, 'out') + store(load('in').foreach { [self.x.null?.or(self.y)] }, 'out') end output.should include('x is null OR y') end it 'parenthesizes a prefix expression followed by an infix expression' do output = @interpreter.to_pig_latin do - store(load('in').foreach { |r| [r.x.not.and(r.y)] }, 'out') + store(load('in').foreach { [self.x.not.and(self.y)] }, 'out') end output.should include('(NOT x) AND y') end end @@ -613,19 +658,19 @@ [:impression, :int], [:engagement, :int], [:click_thru, :int] ]) %w(site size name).each do |dimension| - result = sessions.group(:ad_id, dimension).foreach do |r| + result = sessions.group(:ad_id, dimension).foreach do [ - r[0].ad_id.as(:ad_id), + self[0].ad_id.as(:ad_id), literal(dimension).as(:dimension), - r[0].field(dimension).as(:value), - r[1].exposure.sum.as(:exposures), - r[1].impression.sum.as(:impressions), - r[1].engagement.sum.as(:engagements), - r[1].click_thru.sum.as(:click_thrus) + self[0].field(dimension).as(:value), + self[1].exposure.sum.as(:exposures), + self[1].impression.sum.as(:impressions), + self[1].engagement.sum.as(:engagements), + self[1].click_thru.sum.as(:click_thrus) ] end store(result, "report_metrics-#{dimension}") end end @@ -798,11 +843,11 @@ it 'knows the schema of a relation projection' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [r.a] } + relation2 = relation1.foreach { [a] } throw :schema, relation2.schema end end schema.field_names.should eql([:a]) schema.field_type(:a).should eql(:float) @@ -810,11 +855,11 @@ it 'knows the schema of a relation projection containing a call to MAX' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [r.a.max] } + relation2 = relation1.foreach { [a.max] } throw :schema, relation2.schema end end schema.field_names.should eql([nil]) schema.field_type(0).should eql(:float) @@ -822,11 +867,11 @@ it 'knows the schema of a relation projection containing a call to COUNT' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [r.a.count] } + relation2 = relation1.foreach { [a.count] } throw :schema, relation2.schema end end schema.field_names.should eql([nil]) schema.field_type(0).should eql(:long) @@ -834,43 +879,43 @@ it 'knows the schema of a relation projection containing a field rename' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [r.a.count.as(:x)] } + relation2 = relation1.foreach { [a.count.as(:x)] } throw :schema, relation2.schema end end schema.field_names.should eql([:x]) end it 'knows the schema of a relation projection containing a literal string' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [literal('blipp')] } + relation2 = relation1.foreach { [literal('blipp')] } throw :schema, relation2.schema end end schema.field_type(0).should eql(:chararray) end it 'knows the schema of a relation projection containing a literal integer' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [literal(4)] } + relation2 = relation1.foreach { [literal(4)] } throw :schema, relation2.schema end end schema.field_type(0).should eql(:int) end it 'knows the schema of a relation projection containing a literal float' do schema = catch(:schema) do @interpreter.interpret do relation1 = load('in1', :schema => [[:a, :float], [:b, :int]]) - relation2 = relation1.foreach { |r| [literal(3.14)] } + relation2 = relation1.foreach { [literal(3.14)] } throw :schema, relation2.schema end end schema.field_type(0).should eql(:double) end